15 clGetPlatformInfo (
id, CL_PLATFORM_NAME, 0, NULL, &size);
19 clGetPlatformInfo (
id, CL_PLATFORM_NAME, size, const_cast<char*> (result.data ()), NULL);
26 clGetDeviceInfo (
id, CL_DEVICE_NAME, 0, NULL, &size);
30 clGetDeviceInfo (
id, CL_DEVICE_NAME, size, const_cast<char*> (result.data ()), NULL);
36 if (error != CL_SUCCESS) {
37 std::string error_message =
"";
40 case CL_DEVICE_NOT_FOUND:
41 error_message =
"CL_DEVICE_NOT_FOUND";
43 case CL_DEVICE_NOT_AVAILABLE:
44 error_message =
"CL_DEVICE_NOT_AVAILABLE";
46 case CL_COMPILER_NOT_AVAILABLE:
47 error_message =
"CL_COMPILER_NOT_AVAILABLE";
49 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
50 error_message =
"CL_MEM_OBJECT_ALLOCATION_FAILURE";
52 case CL_OUT_OF_RESOURCES:
53 error_message =
"CL_OUT_OF_RESOURCES";
55 case CL_OUT_OF_HOST_MEMORY:
56 error_message =
"CL_OUT_OF_HOST_MEMORY";
58 case CL_PROFILING_INFO_NOT_AVAILABLE:
59 error_message =
"CL_PROFILING_INFO_NOT_AVAILABLE";
61 case CL_MEM_COPY_OVERLAP:
62 error_message =
"CL_MEM_COPY_OVERLAP";
64 case CL_IMAGE_FORMAT_MISMATCH:
65 error_message =
"CL_IMAGE_FORMAT_MISMATCH";
67 case CL_IMAGE_FORMAT_NOT_SUPPORTED:
68 error_message =
"CL_IMAGE_FORMAT_NOT_SUPPORTED";
70 case CL_BUILD_PROGRAM_FAILURE:
71 error_message =
"CL_BUILD_PROGRAM_FAILURE";
74 error_message =
"CL_MAP_FAILURE";
76 case CL_MISALIGNED_SUB_BUFFER_OFFSET:
77 error_message =
"CL_MISALIGNED_SUB_BUFFER_OFFSET";
79 case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
80 error_message =
"CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
82 case CL_COMPILE_PROGRAM_FAILURE:
83 error_message =
"CL_COMPILE_PROGRAM_FAILURE";
85 case CL_LINKER_NOT_AVAILABLE:
86 error_message =
"CL_LINKER_NOT_AVAILABLE";
88 case CL_LINK_PROGRAM_FAILURE:
89 error_message =
"CL_LINK_PROGRAM_FAILURE";
91 case CL_DEVICE_PARTITION_FAILED:
92 error_message =
"CL_DEVICE_PARTITION_FAILED";
94 case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
95 error_message =
"CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
97 case CL_INVALID_VALUE:
98 error_message =
"CL_INVALID_VALUE";
100 case CL_INVALID_DEVICE_TYPE:
101 error_message =
"CL_INVALID_DEVICE_TYPE";
103 case CL_INVALID_PLATFORM:
104 error_message =
"CL_INVALID_PLATFORM";
106 case CL_INVALID_DEVICE:
107 error_message =
"CL_INVALID_DEVICE";
109 case CL_INVALID_CONTEXT:
110 error_message =
"CL_INVALID_CONTEXT";
112 case CL_INVALID_QUEUE_PROPERTIES:
113 error_message =
"CL_INVALID_QUEUE_PROPERTIES";
115 case CL_INVALID_COMMAND_QUEUE:
116 error_message =
"CL_INVALID_COMMAND_QUEUE";
118 case CL_INVALID_HOST_PTR:
119 error_message =
"CL_INVALID_HOST_PTR";
121 case CL_INVALID_MEM_OBJECT:
122 error_message =
"CL_INVALID_MEM_OBJECT";
124 case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
125 error_message =
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
127 case CL_INVALID_BUILD_OPTIONS:
128 error_message =
"CL_INVALID_BUILD_OPTIONS";
130 case CL_INVALID_PROGRAM:
131 error_message =
"CL_INVALID_PROGRAM";
133 case CL_INVALID_BINARY:
134 error_message =
"CL_INVALID_BINARY";
136 case CL_INVALID_PROGRAM_EXECUTABLE:
137 error_message =
"CL_INVALID_PROGRAM_EXECUTABLE";
139 case CL_INVALID_KERNEL_NAME:
140 error_message =
"CL_INVALID_KERNEL_NAME";
142 case CL_INVALID_KERNEL_DEFINITION:
143 error_message =
"CL_INVALID_KERNEL_DEFINITION";
145 case CL_INVALID_KERNEL:
146 error_message =
"CL_INVALID_KERNEL";
148 case CL_INVALID_ARG_INDEX:
149 error_message =
"CL_INVALID_ARG_INDEX";
151 case CL_INVALID_ARG_VALUE:
152 error_message =
"CL_INVALID_ARG_VALUE";
154 case CL_INVALID_ARG_SIZE:
155 error_message =
"CL_INVALID_ARG_SIZE";
157 case CL_INVALID_KERNEL_ARGS:
158 error_message =
"CL_INVALID_KERNEL_ARGS";
160 case CL_INVALID_WORK_DIMENSION:
161 error_message =
"CL_INVALID_WORK_DIMENSION";
163 case CL_INVALID_WORK_GROUP_SIZE:
164 error_message =
"CL_INVALID_WORK_GROUP_SIZE";
166 case CL_INVALID_WORK_ITEM_SIZE:
167 error_message =
"CL_INVALID_WORK_ITEM_SIZE";
169 case CL_INVALID_GLOBAL_OFFSET:
170 error_message =
"CL_INVALID_GLOBAL_OFFSET";
172 case CL_INVALID_EVENT_WAIT_LIST:
173 error_message =
"CL_INVALID_EVENT_WAIT_LIST";
175 case CL_INVALID_EVENT:
176 error_message =
"CL_INVALID_EVENT";
178 case CL_INVALID_OPERATION:
179 error_message =
"CL_INVALID_OPERATION";
181 case CL_INVALID_GL_OBJECT:
182 error_message =
"CL_INVALID_GL_OBJECT";
184 case CL_INVALID_BUFFER_SIZE:
185 error_message =
"CL_INVALID_BUFFER_SIZE";
187 case CL_INVALID_MIP_LEVEL:
188 error_message =
"CL_INVALID_MIP_LEVEL";
190 case CL_INVALID_GLOBAL_WORK_SIZE:
191 error_message =
"CL_INVALID_GLOBAL_WORK_SIZE";
193 case CL_INVALID_PROPERTY:
194 error_message =
"CL_INVALID_PROPERTY";
196 case CL_INVALID_IMAGE_DESCRIPTOR:
197 error_message =
"CL_INVALID_IMAGE_DESCRIPTOR";
199 case CL_INVALID_COMPILER_OPTIONS:
200 error_message =
"CL_INVALID_COMPILER_OPTIONS";
202 case CL_INVALID_LINKER_OPTIONS:
203 error_message =
"CL_INVALID_LINKER_OPTIONS";
205 case CL_INVALID_DEVICE_PARTITION_COUNT:
206 error_message =
"CL_INVALID_DEVICE_PARTITION_COUNT";
210 ROS_ERROR(
"OpenCL call failed with error: %d: %s", error, error_message.c_str());
216 std::ifstream in (name);
217 std::string result((std::istreambuf_iterator<char>(in)), std::istreambuf_iterator<char>());
222 size_t lengths [1] = { source.size () };
223 const char* sources [1] = { source.data () };
226 cl_program program_ = clCreateProgramWithSource(context, 1, sources, lengths, &error);
235 cl_uint platformIdCount = 0;
236 clGetPlatformIDs (0, NULL, &platformIdCount);
238 if (platformIdCount == 0) {
239 ROS_ERROR(
"No OpenCL platform found");
243 ROS_INFO(
"Found %d platform(s)", platformIdCount);
246 std::vector<cl_platform_id> platformIds (platformIdCount);
247 clGetPlatformIDs (platformIdCount, platformIds.data (), NULL);
249 for (cl_uint i = 0; i < platformIdCount; ++i) {
250 ROS_INFO(
"\t (%d) : %s", i+1,
getPlatformName (platformIds [i]).c_str());
253 cl_uint deviceIdCount = 0;
254 clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceIdCount);
256 if (deviceIdCount == 0) {
257 ROS_ERROR(
"No OpenCL devices found");
261 ROS_INFO(
"Found %d device(s)", deviceIdCount);
264 deviceIds = std::vector<cl_device_id>(deviceIdCount);
265 clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_ALL, deviceIdCount,
deviceIds.data(), NULL);
267 for (cl_uint i = 0; i < deviceIdCount; ++i) {
271 const cl_context_properties contextProperties [] = {CL_CONTEXT_PLATFORM,
reinterpret_cast<cl_context_properties
> (platformIds [0]), 0, 0};
273 cl_int error = CL_SUCCESS;
274 context = clCreateContext (contextProperties, deviceIdCount,
deviceIds.data (), NULL, NULL, &error);
277 ROS_INFO(
"Context created");
283 ROS_INFO(
"Program built");
285 kernel = clCreateKernel (
program, kernel_function.c_str(), &error);
288 ROS_INFO(
"Kernel created");
306 size_t sz = msg.data.size();
308 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, sz, NULL, &error);
310 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
311 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
313 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg.data[0], 0, NULL, NULL);
320 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
322 clWaitForEvents(1, &gpuExec);
324 uint8_t *result = (uint8_t *) malloc(sz);
325 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
327 sensor_msgs::PointCloud2 res = sensor_msgs::PointCloud2(msg);
328 res.data.assign(result, result+sz);
330 clReleaseCommandQueue (queue);
331 clReleaseMemObject(buffer);
332 clReleaseEvent(gpuExec);
339 size_t sz = msg->data.size();
341 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, sz, NULL, &error);
343 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
344 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
346 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg->data[0], 0, NULL, NULL);
353 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
355 clWaitForEvents(1, &gpuExec);
357 uint8_t *result = (uint8_t *) malloc(sz);
358 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
360 msg->data.assign(result, result+sz);
362 clReleaseCommandQueue (queue);
363 clReleaseMemObject(buffer);
364 clReleaseEvent(gpuExec);
369 size_t sz = msg.ranges.size();
370 cl_int typesz =
sizeof(float) * sz;
372 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
374 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
375 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
377 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &msg.ranges[0], 0, NULL, NULL);
384 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
386 clWaitForEvents(1, &gpuExec);
388 float *result = (
float *) malloc(typesz);
389 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
391 sensor_msgs::LaserScan res = sensor_msgs::LaserScan(msg);
392 res.ranges.assign(result, result+sz);
394 clReleaseCommandQueue (queue);
395 clReleaseMemObject(buffer);
396 clReleaseEvent(gpuExec);
403 size_t sz = msg->ranges.size();
404 cl_int typesz =
sizeof(float) * sz;
406 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
408 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
409 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
411 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &msg->ranges[0], 0, NULL, NULL);
418 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
420 clWaitForEvents(1, &gpuExec);
422 float *result = (
float *) malloc(typesz);
423 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
425 msg->ranges.assign(result, result+sz);
427 clReleaseCommandQueue (queue);
428 clReleaseMemObject(buffer);
429 clReleaseEvent(gpuExec);
434 size_t sz = msg.data.size();
436 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, sz, NULL, &error);
438 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
439 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
441 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg.data[0], 0, NULL, NULL);
448 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
450 clWaitForEvents(1, &gpuExec);
452 uint8_t *result = (uint8_t *) malloc(sz);
453 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
455 sensor_msgs::Image res = sensor_msgs::Image(msg);
456 res.data.assign(result, result+sz);
458 clReleaseCommandQueue (queue);
459 clReleaseMemObject(buffer);
460 clReleaseEvent(gpuExec);
467 size_t sz = msg->data.size();
469 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, sz, NULL, &error);
471 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
472 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
474 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sz, &msg->data[0], 0, NULL, NULL);
481 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
483 clWaitForEvents(1, &gpuExec);
485 uint8_t *result = (uint8_t *) malloc(sz);
486 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sz, result, 0, NULL, NULL));
488 msg->data.assign(result, result+sz);
490 clReleaseCommandQueue (queue);
491 clReleaseMemObject(buffer);
492 clReleaseEvent(gpuExec);
497 size_t sz = v.size();
498 cl_int typesz =
sizeof(float) * sz;
499 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
502 ROS_WARN(
"buffers_size includes more elements than needed! Using only the first...");
507 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
509 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
510 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
512 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
519 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
526 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
528 clWaitForEvents(1, &gpuExec);
530 float *result = (
float *) malloc(typesz);
531 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
533 std::vector<float> res = std::vector<float>();
535 if (params != NULL and params->
buffers_size.size() > 0){
539 res.assign(result, result+sz);
542 clReleaseCommandQueue (queue);
543 clReleaseMemObject(buffer);
544 clReleaseEvent(gpuExec);
551 size_t sz = v->size();
552 cl_int typesz =
sizeof(float) * sz;
553 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
556 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first... Using only the first...");
561 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
563 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
564 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
566 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
573 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
580 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
582 clWaitForEvents(1, &gpuExec);
584 float *result = (
float *) malloc(typesz);
585 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
587 if (params != NULL and params->
buffers_size.size() > 0){
591 v->assign(result, result+sz);
594 clReleaseCommandQueue (queue);
595 clReleaseMemObject(buffer);
596 clReleaseEvent(gpuExec);
601 size_t sz = v.size();
602 cl_int typesz =
sizeof(double) * sz;
603 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
606 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
611 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
613 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
614 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
616 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
623 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
630 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
632 clWaitForEvents(1, &gpuExec);
634 double *result = (
double *) malloc(typesz);
635 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
637 std::vector<double> res = std::vector<double>();
639 if (params != NULL and params->
buffers_size.size() > 0){
643 res.assign(result, result+sz);
646 clReleaseCommandQueue (queue);
647 clReleaseMemObject(buffer);
648 clReleaseEvent(gpuExec);
655 size_t sz = v->size();
656 cl_int typesz =
sizeof(double) * sz;
657 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
660 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
665 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
667 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
668 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
670 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
677 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
684 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
686 clWaitForEvents(1, &gpuExec);
688 double *result = (
double *) malloc(typesz);
689 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
691 if (params != NULL and params->
buffers_size.size() > 0){
695 v->assign(result, result+sz);
698 clReleaseCommandQueue (queue);
699 clReleaseMemObject(buffer);
700 clReleaseEvent(gpuExec);
705 size_t sz = v.size();
706 cl_int typesz =
sizeof(int) * sz;
707 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
710 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
715 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
717 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
718 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
720 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
727 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
734 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
736 clWaitForEvents(1, &gpuExec);
738 int *result = (
int *) malloc(typesz);
739 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
741 std::vector<int> res = std::vector<int>();
743 if (params != NULL and params->
buffers_size.size() > 0){
747 res.assign(result, result+sz);
750 clReleaseCommandQueue (queue);
751 clReleaseMemObject(buffer);
752 clReleaseEvent(gpuExec);
759 size_t sz = v->size();
760 cl_int typesz =
sizeof(int) * sz;
761 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
764 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
769 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
771 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
772 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
774 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
781 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
788 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
790 clWaitForEvents(1, &gpuExec);
792 int *result = (
int *) malloc(typesz);
793 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
795 if (params != NULL and params->
buffers_size.size() > 0){
799 v->assign(result, result+sz);
802 clReleaseCommandQueue (queue);
803 clReleaseMemObject(buffer);
804 clReleaseEvent(gpuExec);
809 size_t sz = v.size();
810 cl_int typesz =
sizeof(char) * sz;
811 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
814 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
819 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
821 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
822 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
824 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
831 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
838 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
840 clWaitForEvents(1, &gpuExec);
842 char *result = (
char *) malloc(typesz);
843 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
845 std::vector<char> res = std::vector<char>();
847 if (params != NULL and params->
buffers_size.size() > 0){
851 res.assign(result, result+sz);
854 clReleaseCommandQueue (queue);
855 clReleaseMemObject(buffer);
856 clReleaseEvent(gpuExec);
863 size_t sz = v->size();
864 cl_int typesz =
sizeof(char) * sz;
865 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
868 ROS_WARN(
"buffer_size includes more elements than needed! Using only the first...");
873 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
875 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
876 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
878 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
885 ROS_WARN(
"global_work_size includes more elements than needed! Using only the first...");
892 checkError (clEnqueueNDRangeKernel (queue,
kernel, 1, NULL, &size, NULL, 0, NULL, &gpuExec));
894 clWaitForEvents(1, &gpuExec);
896 char *result = (
char *) malloc(typesz);
897 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
899 if (params != NULL and params->
buffers_size.size() > 0){
903 v->assign(result, result+sz);
906 clReleaseCommandQueue (queue);
907 clReleaseMemObject(buffer);
908 clReleaseEvent(gpuExec);
913 size_t sz = v.size();
914 size_t sz2 = v2.size();
915 size_t typesz =
sizeof(char) * sz;
916 size_t typesz2 =
sizeof(char) * sz2;
917 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
921 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
927 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
931 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
933 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
935 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
936 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
937 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
939 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
941 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
944 size_t size[2] = {sz, sz2};
945 size_t work_dimension = 2;
948 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
951 else if(temp_sz > 0){
953 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 954 For default multidimensional global work size, leave the global_work_size vector empty, \ 955 and set multi_dimensional to true. Setting the global work size based on the values inside \ 956 the global_work_size vector.");
966 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
973 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
975 clWaitForEvents(1, &gpuExec);
977 char *result = (
char *) malloc(typesz);
978 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
980 std::vector<char> res = std::vector<char>();
981 res.assign(result, result+sz);
983 clReleaseCommandQueue (queue);
984 clReleaseMemObject(buffer);
985 clReleaseMemObject(buffer2);
986 clReleaseEvent(gpuExec);
993 size_t sz = v->size();
994 size_t sz2 = v2.size();
995 size_t typesz =
sizeof(char) * sz;
996 size_t typesz2 =
sizeof(char) * sz2;
997 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1001 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1007 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1011 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1013 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1015 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1016 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1017 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1019 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1021 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1024 size_t size[2] = {sz, sz2};
1025 size_t work_dimension = 2;
1028 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1031 else if(temp_sz > 0){
1033 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1034 For default multidimensional global work size, leave the global_work_size vector empty, \ 1035 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1036 the global_work_size vector.");
1046 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1053 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1055 clWaitForEvents(1, &gpuExec);
1057 char *result = (
char *) malloc(typesz);
1058 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1060 v->assign(result, result+sz);
1062 clReleaseCommandQueue (queue);
1063 clReleaseMemObject(buffer);
1064 clReleaseMemObject(buffer2);
1065 clReleaseEvent(gpuExec);
1070 size_t sz = v->size();
1071 size_t sz2 = v2->size();
1072 size_t typesz =
sizeof(char) * sz;
1073 size_t typesz2 =
sizeof(char) * sz2;
1074 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1078 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1084 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1088 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1090 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1092 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1093 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1094 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1096 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1098 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1101 size_t size[2] = {sz, sz2};
1102 size_t work_dimension = 2;
1105 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1108 else if(temp_sz > 0){
1110 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1111 For default multidimensional global work size, leave the global_work_size vector empty, \ 1112 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1113 the global_work_size vector.");
1123 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1130 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1132 clWaitForEvents(1, &gpuExec);
1134 char *result = (
char *) malloc(typesz);
1135 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1137 v->assign(result, result+sz);
1139 if (typesz2 != typesz or sz != sz2){
1141 result2 = (
char *) malloc(typesz2);
1142 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1144 v2->assign(result2, result2+sz2);
1148 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
1150 v2->assign(result, result+sz2);
1153 clReleaseCommandQueue (queue);
1154 clReleaseMemObject(buffer);
1155 clReleaseMemObject(buffer2);
1156 clReleaseEvent(gpuExec);
1161 size_t sz = v.size();
1162 size_t sz2 = v2.size();
1163 size_t typesz =
sizeof(char) * sz;
1164 size_t typesz2 =
sizeof(int) * sz2;
1165 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1169 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1175 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1179 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1181 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1183 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1184 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1185 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1187 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1189 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1192 size_t size[2] = {sz, sz2};
1193 size_t work_dimension = 2;
1196 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1199 else if(temp_sz > 0){
1201 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1202 For default multidimensional global work size, leave the global_work_size vector empty, \ 1203 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1204 the global_work_size vector.");
1214 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1221 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1223 clWaitForEvents(1, &gpuExec);
1225 char *result = (
char *) malloc(typesz);
1226 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1228 std::vector<char> res = std::vector<char>();
1229 res.assign(result, result+sz);
1231 clReleaseCommandQueue (queue);
1232 clReleaseMemObject(buffer);
1233 clReleaseMemObject(buffer2);
1234 clReleaseEvent(gpuExec);
1241 size_t sz = v->size();
1242 size_t sz2 = v2.size();
1243 size_t typesz =
sizeof(char) * sz;
1244 size_t typesz2 =
sizeof(int) * sz2;
1245 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1249 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1255 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1259 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1261 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1263 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1264 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1265 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1267 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1269 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1272 size_t size[2] = {sz, sz2};
1273 size_t work_dimension = 2;
1276 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1279 else if(temp_sz > 0){
1281 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1282 For default multidimensional global work size, leave the global_work_size vector empty, \ 1283 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1284 the global_work_size vector.");
1294 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1301 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1303 clWaitForEvents(1, &gpuExec);
1305 char *result = (
char *) malloc(typesz);
1306 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1308 v->assign(result, result+sz);
1310 clReleaseCommandQueue (queue);
1311 clReleaseMemObject(buffer);
1312 clReleaseMemObject(buffer2);
1313 clReleaseEvent(gpuExec);
1318 size_t sz = v->size();
1319 size_t sz2 = v2->size();
1320 size_t typesz =
sizeof(char) * sz;
1321 size_t typesz2 =
sizeof(int) * sz2;
1322 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1326 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1332 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1336 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1338 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1340 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1341 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1342 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1344 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1346 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1349 size_t size[2] = {sz, sz2};
1350 size_t work_dimension = 2;
1353 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1356 else if(temp_sz > 0){
1358 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1359 For default multidimensional global work size, leave the global_work_size vector empty, \ 1360 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1361 the global_work_size vector.");
1371 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1377 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1379 clWaitForEvents(1, &gpuExec);
1381 char *result = (
char *) malloc(typesz);
1382 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1384 v->assign(result, result+sz);
1386 int *result2 = (
int *) malloc(typesz2);
1387 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1389 v2->assign(result2, result2+sz2);
1391 clReleaseCommandQueue (queue);
1392 clReleaseMemObject(buffer);
1393 clReleaseMemObject(buffer2);
1394 clReleaseEvent(gpuExec);
1400 size_t sz = v.size();
1401 size_t sz2 = v2.size();
1402 size_t typesz =
sizeof(char) * sz;
1403 size_t typesz2 =
sizeof(float) * sz2;
1404 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1408 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1414 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1418 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1420 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1422 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1423 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1424 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1426 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1428 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1431 size_t size[2] = {sz, sz2};
1432 size_t work_dimension = 2;
1435 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1438 else if(temp_sz > 0){
1440 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1441 For default multidimensional global work size, leave the global_work_size vector empty, \ 1442 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1443 the global_work_size vector.");
1453 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1460 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1462 clWaitForEvents(1, &gpuExec);
1464 char *result = (
char *) malloc(typesz);
1465 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1467 std::vector<char> res = std::vector<char>();
1468 res.assign(result, result+sz);
1470 clReleaseCommandQueue (queue);
1471 clReleaseMemObject(buffer);
1472 clReleaseMemObject(buffer2);
1473 clReleaseEvent(gpuExec);
1480 size_t sz = v->size();
1481 size_t sz2 = v2.size();
1482 size_t typesz =
sizeof(char) * sz;
1483 size_t typesz2 =
sizeof(float) * sz2;
1484 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1488 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1494 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1498 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1500 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1502 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1503 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1504 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1506 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1508 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1511 size_t size[2] = {sz, sz2};
1512 size_t work_dimension = 2;
1515 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1518 else if(temp_sz > 0){
1520 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1521 For default multidimensional global work size, leave the global_work_size vector empty, \ 1522 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1523 the global_work_size vector.");
1533 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1540 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1542 clWaitForEvents(1, &gpuExec);
1544 char *result = (
char *) malloc(typesz);
1545 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1547 v->assign(result, result+sz);
1549 clReleaseCommandQueue (queue);
1550 clReleaseMemObject(buffer);
1551 clReleaseMemObject(buffer2);
1552 clReleaseEvent(gpuExec);
1557 size_t sz = v->size();
1558 size_t sz2 = v2->size();
1559 size_t typesz =
sizeof(char) * sz;
1560 size_t typesz2 =
sizeof(float) * sz2;
1561 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1565 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1571 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1575 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1577 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1579 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1580 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1581 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1583 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1585 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1588 size_t size[2] = {sz, sz2};
1589 size_t work_dimension = 2;
1592 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1595 else if(temp_sz > 0){
1597 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1598 For default multidimensional global work size, leave the global_work_size vector empty, \ 1599 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1600 the global_work_size vector.");
1610 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1616 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1618 clWaitForEvents(1, &gpuExec);
1620 char *result = (
char *) malloc(typesz);
1621 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1623 v->assign(result, result+sz);
1625 float *result2 = (
float *) malloc(typesz2);
1626 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1628 v2->assign(result2, result2+sz2);
1630 clReleaseCommandQueue (queue);
1631 clReleaseMemObject(buffer);
1632 clReleaseMemObject(buffer2);
1633 clReleaseEvent(gpuExec);
1639 size_t sz = v.size();
1640 size_t sz2 = v2.size();
1641 size_t typesz =
sizeof(char) * sz;
1642 size_t typesz2 =
sizeof(double) * sz2;
1643 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1647 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1653 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1657 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1659 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1661 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1662 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1663 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1665 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1667 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1670 size_t size[2] = {sz, sz2};
1671 size_t work_dimension = 2;
1674 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1677 else if(temp_sz > 0){
1679 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1680 For default multidimensional global work size, leave the global_work_size vector empty, \ 1681 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1682 the global_work_size vector.");
1692 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1699 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1701 clWaitForEvents(1, &gpuExec);
1703 char *result = (
char *) malloc(typesz);
1704 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1706 std::vector<char> res = std::vector<char>();
1707 res.assign(result, result+sz);
1709 clReleaseCommandQueue (queue);
1710 clReleaseMemObject(buffer);
1711 clReleaseMemObject(buffer2);
1712 clReleaseEvent(gpuExec);
1719 size_t sz = v->size();
1720 size_t sz2 = v2.size();
1721 size_t typesz =
sizeof(char) * sz;
1722 size_t typesz2 =
sizeof(double) * sz2;
1723 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1727 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1733 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1737 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1739 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1741 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1742 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1743 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1745 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1747 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1750 size_t size[2] = {sz, sz2};
1751 size_t work_dimension = 2;
1754 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1757 else if(temp_sz > 0){
1759 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1760 For default multidimensional global work size, leave the global_work_size vector empty, \ 1761 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1762 the global_work_size vector.");
1772 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1779 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1781 clWaitForEvents(1, &gpuExec);
1783 char *result = (
char *) malloc(typesz);
1784 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1786 v->assign(result, result+sz);
1788 clReleaseCommandQueue (queue);
1789 clReleaseMemObject(buffer);
1790 clReleaseMemObject(buffer2);
1791 clReleaseEvent(gpuExec);
1796 size_t sz = v->size();
1797 size_t sz2 = v2->size();
1798 size_t typesz =
sizeof(char) * sz;
1799 size_t typesz2 =
sizeof(double) * sz2;
1800 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1804 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1810 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1814 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1816 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
1818 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1819 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1820 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1822 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1824 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
1827 size_t size[2] = {sz, sz2};
1828 size_t work_dimension = 2;
1831 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1834 else if(temp_sz > 0){
1836 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1837 For default multidimensional global work size, leave the global_work_size vector empty, \ 1838 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1839 the global_work_size vector.");
1849 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1855 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1857 clWaitForEvents(1, &gpuExec);
1859 char *result = (
char *) malloc(typesz);
1860 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1862 v->assign(result, result+sz);
1864 double *result2 = (
double *) malloc(typesz2);
1865 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
1867 v2->assign(result2, result2+sz2);
1869 clReleaseCommandQueue (queue);
1870 clReleaseMemObject(buffer);
1871 clReleaseMemObject(buffer2);
1872 clReleaseEvent(gpuExec);
1878 size_t sz = v.size();
1879 size_t sz2 = v2.size();
1880 size_t typesz =
sizeof(int) * sz;
1881 size_t typesz2 =
sizeof(char) * sz2;
1882 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1886 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1892 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1896 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1898 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1900 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1901 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1902 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1904 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
1906 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1909 size_t size[2] = {sz, sz2};
1910 size_t work_dimension = 2;
1913 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1916 else if(temp_sz > 0){
1918 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1919 For default multidimensional global work size, leave the global_work_size vector empty, \ 1920 and set multi_dimensional to true. Setting the global work size based on the values inside \ 1921 the global_work_size vector.");
1931 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
1938 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
1940 clWaitForEvents(1, &gpuExec);
1942 int *result = (
int *) malloc(typesz);
1943 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
1945 std::vector<int> res = std::vector<int>();
1946 res.assign(result, result+sz);
1948 clReleaseCommandQueue (queue);
1949 clReleaseMemObject(buffer);
1950 clReleaseMemObject(buffer2);
1951 clReleaseEvent(gpuExec);
1958 size_t sz = v->size();
1959 size_t sz2 = v2.size();
1960 size_t typesz =
sizeof(int) * sz;
1961 size_t typesz2 =
sizeof(char) * sz2;
1962 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
1966 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
1972 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
1976 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
1978 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
1980 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
1981 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
1982 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
1984 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
1986 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
1989 size_t size[2] = {sz, sz2};
1990 size_t work_dimension = 2;
1993 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
1996 else if(temp_sz > 0){
1998 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 1999 For default multidimensional global work size, leave the global_work_size vector empty, \ 2000 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2001 the global_work_size vector.");
2011 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2018 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2020 clWaitForEvents(1, &gpuExec);
2022 int *result = (
int *) malloc(typesz);
2023 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2025 v->assign(result, result+sz);
2027 clReleaseCommandQueue (queue);
2028 clReleaseMemObject(buffer);
2029 clReleaseMemObject(buffer2);
2030 clReleaseEvent(gpuExec);
2035 size_t sz = v->size();
2036 size_t sz2 = v2->size();
2037 size_t typesz =
sizeof(int) * sz;
2038 size_t typesz2 =
sizeof(char) * sz2;
2039 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2043 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2049 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2053 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2055 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2057 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2058 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2059 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2061 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2063 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2066 size_t size[2] = {sz, sz2};
2067 size_t work_dimension = 2;
2070 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2073 else if(temp_sz > 0){
2075 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2076 For default multidimensional global work size, leave the global_work_size vector empty, \ 2077 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2078 the global_work_size vector.");
2088 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2094 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2096 clWaitForEvents(1, &gpuExec);
2098 int *result = (
int *) malloc(typesz);
2099 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2101 v->assign(result, result+sz);
2103 char *result2 = (
char *) malloc(typesz2);
2104 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2106 v2->assign(result2, result2+sz2);
2108 clReleaseCommandQueue (queue);
2109 clReleaseMemObject(buffer);
2110 clReleaseMemObject(buffer2);
2111 clReleaseEvent(gpuExec);
2117 size_t sz = v.size();
2118 size_t sz2 = v2.size();
2119 size_t typesz =
sizeof(int) * sz;
2120 size_t typesz2 =
sizeof(int) * sz2;
2121 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2125 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2131 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2135 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2137 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2139 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2140 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2141 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2143 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2145 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2148 size_t size[2] = {sz, sz2};
2149 size_t work_dimension = 2;
2152 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2155 else if(temp_sz > 0){
2157 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2158 For default multidimensional global work size, leave the global_work_size vector empty, \ 2159 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2160 the global_work_size vector.");
2170 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2177 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2179 clWaitForEvents(1, &gpuExec);
2181 int *result = (
int *) malloc(typesz);
2182 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2184 std::vector<int> res = std::vector<int>();
2185 res.assign(result, result+sz);
2187 clReleaseCommandQueue (queue);
2188 clReleaseMemObject(buffer);
2189 clReleaseMemObject(buffer2);
2190 clReleaseEvent(gpuExec);
2197 size_t sz = v->size();
2198 size_t sz2 = v2.size();
2199 size_t typesz =
sizeof(int) * sz;
2200 size_t typesz2 =
sizeof(int) * sz2;
2201 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2205 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2211 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2215 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2217 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2219 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2220 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2221 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2223 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2225 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2228 size_t size[2] = {sz, sz2};
2229 size_t work_dimension = 2;
2232 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2235 else if(temp_sz > 0){
2237 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2238 For default multidimensional global work size, leave the global_work_size vector empty, \ 2239 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2240 the global_work_size vector.");
2250 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2257 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2259 clWaitForEvents(1, &gpuExec);
2261 int *result = (
int *) malloc(typesz);
2262 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2264 v->assign(result, result+sz);
2266 clReleaseCommandQueue (queue);
2267 clReleaseMemObject(buffer);
2268 clReleaseMemObject(buffer2);
2269 clReleaseEvent(gpuExec);
2274 size_t sz = v->size();
2275 size_t sz2 = v2->size();
2276 size_t typesz =
sizeof(int) * sz;
2277 size_t typesz2 =
sizeof(int) * sz2;
2278 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2282 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2288 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2292 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2294 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2296 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2297 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2298 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2300 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2302 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2305 size_t size[2] = {sz, sz2};
2306 size_t work_dimension = 2;
2309 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2312 else if(temp_sz > 0){
2314 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2315 For default multidimensional global work size, leave the global_work_size vector empty, \ 2316 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2317 the global_work_size vector.");
2327 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2333 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2335 clWaitForEvents(1, &gpuExec);
2337 int *result = (
int *) malloc(typesz);
2338 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2340 v->assign(result, result+sz);
2342 if (typesz2 != typesz or sz != sz2){
2344 result2 = (
int *) malloc(typesz2);
2345 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2347 v2->assign(result2, result2+sz2);
2351 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
2353 v2->assign(result, result+sz2);
2356 clReleaseCommandQueue (queue);
2357 clReleaseMemObject(buffer);
2358 clReleaseMemObject(buffer2);
2359 clReleaseEvent(gpuExec);
2364 size_t sz = v.size();
2365 size_t sz2 = v2.size();
2366 size_t typesz =
sizeof(int) * sz;
2367 size_t typesz2 =
sizeof(float) * sz2;
2368 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2372 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2378 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2382 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2384 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2386 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2387 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2388 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2390 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2392 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2395 size_t size[2] = {sz, sz2};
2396 size_t work_dimension = 2;
2399 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2402 else if(temp_sz > 0){
2404 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2405 For default multidimensional global work size, leave the global_work_size vector empty, \ 2406 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2407 the global_work_size vector.");
2417 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2424 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2426 clWaitForEvents(1, &gpuExec);
2428 int *result = (
int *) malloc(typesz);
2429 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2431 std::vector<int> res = std::vector<int>();
2432 res.assign(result, result+sz);
2434 clReleaseCommandQueue (queue);
2435 clReleaseMemObject(buffer);
2436 clReleaseMemObject(buffer2);
2437 clReleaseEvent(gpuExec);
2444 size_t sz = v->size();
2445 size_t sz2 = v2.size();
2446 size_t typesz =
sizeof(int) * sz;
2447 size_t typesz2 =
sizeof(float) * sz2;
2448 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2452 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2458 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2462 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2464 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2466 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2467 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2468 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2470 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2472 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2475 size_t size[2] = {sz, sz2};
2476 size_t work_dimension = 2;
2479 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2482 else if(temp_sz > 0){
2484 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2485 For default multidimensional global work size, leave the global_work_size vector empty, \ 2486 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2487 the global_work_size vector.");
2497 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2504 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2506 clWaitForEvents(1, &gpuExec);
2508 int *result = (
int *) malloc(typesz);
2509 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2511 v->assign(result, result+sz);
2513 clReleaseCommandQueue (queue);
2514 clReleaseMemObject(buffer);
2515 clReleaseMemObject(buffer2);
2516 clReleaseEvent(gpuExec);
2521 size_t sz = v->size();
2522 size_t sz2 = v2->size();
2523 size_t typesz =
sizeof(int) * sz;
2524 size_t typesz2 =
sizeof(float) * sz2;
2525 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2529 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2535 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2539 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2541 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2543 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2544 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2545 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2547 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2549 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2552 size_t size[2] = {sz, sz2};
2553 size_t work_dimension = 2;
2556 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2559 else if(temp_sz > 0){
2561 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2562 For default multidimensional global work size, leave the global_work_size vector empty, \ 2563 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2564 the global_work_size vector.");
2574 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2580 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2582 clWaitForEvents(1, &gpuExec);
2584 int *result = (
int *) malloc(typesz);
2585 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2587 v->assign(result, result+sz);
2589 float *result2 = (
float *) malloc(typesz2);
2590 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2592 v2->assign(result2, result2+sz2);
2594 clReleaseCommandQueue (queue);
2595 clReleaseMemObject(buffer);
2596 clReleaseMemObject(buffer2);
2597 clReleaseEvent(gpuExec);
2603 size_t sz = v.size();
2604 size_t sz2 = v2.size();
2605 size_t typesz =
sizeof(int) * sz;
2606 size_t typesz2 =
sizeof(double) * sz2;
2607 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2611 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2617 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2621 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2623 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2625 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2626 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2627 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2629 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2631 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2634 size_t size[2] = {sz, sz2};
2635 size_t work_dimension = 2;
2638 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2641 else if(temp_sz > 0){
2643 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2644 For default multidimensional global work size, leave the global_work_size vector empty, \ 2645 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2646 the global_work_size vector.");
2656 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2663 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2665 clWaitForEvents(1, &gpuExec);
2667 int *result = (
int *) malloc(typesz);
2668 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2670 std::vector<int> res = std::vector<int>();
2671 res.assign(result, result+sz);
2673 clReleaseCommandQueue (queue);
2674 clReleaseMemObject(buffer);
2675 clReleaseMemObject(buffer2);
2676 clReleaseEvent(gpuExec);
2683 size_t sz = v->size();
2684 size_t sz2 = v2.size();
2685 size_t typesz =
sizeof(int) * sz;
2686 size_t typesz2 =
sizeof(double) * sz2;
2687 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2691 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2697 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2701 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2703 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2705 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2706 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2707 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2709 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2711 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2714 size_t size[2] = {sz, sz2};
2715 size_t work_dimension = 2;
2718 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2721 else if(temp_sz > 0){
2723 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2724 For default multidimensional global work size, leave the global_work_size vector empty, \ 2725 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2726 the global_work_size vector.");
2736 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2743 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2745 clWaitForEvents(1, &gpuExec);
2747 int *result = (
int *) malloc(typesz);
2748 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2750 v->assign(result, result+sz);
2752 clReleaseCommandQueue (queue);
2753 clReleaseMemObject(buffer);
2754 clReleaseMemObject(buffer2);
2755 clReleaseEvent(gpuExec);
2760 size_t sz = v->size();
2761 size_t sz2 = v2->size();
2762 size_t typesz =
sizeof(int) * sz;
2763 size_t typesz2 =
sizeof(double) * sz2;
2764 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2768 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2774 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2778 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2780 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
2782 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2783 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2784 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2786 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2788 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
2791 size_t size[2] = {sz, sz2};
2792 size_t work_dimension = 2;
2795 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2798 else if(temp_sz > 0){
2800 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2801 For default multidimensional global work size, leave the global_work_size vector empty, \ 2802 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2803 the global_work_size vector.");
2813 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2819 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2821 clWaitForEvents(1, &gpuExec);
2823 int *result = (
int *) malloc(typesz);
2824 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2826 v->assign(result, result+sz);
2828 double *result2 = (
double *) malloc(typesz2);
2829 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
2831 v2->assign(result2, result2+sz2);
2833 clReleaseCommandQueue (queue);
2834 clReleaseMemObject(buffer);
2835 clReleaseMemObject(buffer2);
2836 clReleaseEvent(gpuExec);
2842 size_t sz = v.size();
2843 size_t sz2 = v2.size();
2844 size_t typesz =
sizeof(float) * sz;
2845 size_t typesz2 =
sizeof(char) * sz2;
2846 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2850 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2856 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2860 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2862 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2864 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2865 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2866 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2868 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
2870 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2873 size_t size[2] = {sz, sz2};
2874 size_t work_dimension = 2;
2877 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2880 else if(temp_sz > 0){
2882 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2883 For default multidimensional global work size, leave the global_work_size vector empty, \ 2884 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2885 the global_work_size vector.");
2895 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2902 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2904 clWaitForEvents(1, &gpuExec);
2906 float *result = (
float *) malloc(typesz);
2907 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2909 std::vector<float> res = std::vector<float>();
2910 res.assign(result, result+sz);
2912 clReleaseCommandQueue (queue);
2913 clReleaseMemObject(buffer);
2914 clReleaseMemObject(buffer2);
2915 clReleaseEvent(gpuExec);
2922 size_t sz = v->size();
2923 size_t sz2 = v2.size();
2924 size_t typesz =
sizeof(float) * sz;
2925 size_t typesz2 =
sizeof(char) * sz2;
2926 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
2930 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
2936 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
2940 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
2942 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
2944 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
2945 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
2946 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
2948 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
2950 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
2953 size_t size[2] = {sz, sz2};
2954 size_t work_dimension = 2;
2957 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
2960 else if(temp_sz > 0){
2962 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 2963 For default multidimensional global work size, leave the global_work_size vector empty, \ 2964 and set multi_dimensional to true. Setting the global work size based on the values inside \ 2965 the global_work_size vector.");
2975 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
2982 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
2984 clWaitForEvents(1, &gpuExec);
2986 float *result = (
float *) malloc(typesz);
2987 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
2989 v->assign(result, result+sz);
2991 clReleaseCommandQueue (queue);
2992 clReleaseMemObject(buffer);
2993 clReleaseMemObject(buffer2);
2994 clReleaseEvent(gpuExec);
2999 size_t sz = v->size();
3000 size_t sz2 = v2->size();
3001 size_t typesz =
sizeof(float) * sz;
3002 size_t typesz2 =
sizeof(char) * sz2;
3003 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3007 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3013 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3017 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3019 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3021 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3022 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3023 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3025 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3027 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3030 size_t size[2] = {sz, sz2};
3031 size_t work_dimension = 2;
3034 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3037 else if(temp_sz > 0){
3039 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3040 For default multidimensional global work size, leave the global_work_size vector empty, \ 3041 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3042 the global_work_size vector.");
3052 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3058 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3060 clWaitForEvents(1, &gpuExec);
3062 float *result = (
float *) malloc(typesz);
3063 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3065 v->assign(result, result+sz);
3067 char *result2 = (
char *) malloc(typesz2);
3068 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3070 v2->assign(result2, result2+sz2);
3072 clReleaseCommandQueue (queue);
3073 clReleaseMemObject(buffer);
3074 clReleaseMemObject(buffer2);
3075 clReleaseEvent(gpuExec);
3081 size_t sz = v.size();
3082 size_t sz2 = v2.size();
3083 size_t typesz =
sizeof(float) * sz;
3084 size_t typesz2 =
sizeof(int) * sz2;
3085 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3089 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3095 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3099 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3101 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3103 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3104 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3105 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3107 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3109 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3112 size_t size[2] = {sz, sz2};
3113 size_t work_dimension = 2;
3116 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3119 else if(temp_sz > 0){
3121 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3122 For default multidimensional global work size, leave the global_work_size vector empty, \ 3123 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3124 the global_work_size vector.");
3134 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3141 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3143 clWaitForEvents(1, &gpuExec);
3145 float *result = (
float *) malloc(typesz);
3146 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3148 std::vector<float> res = std::vector<float>();
3149 res.assign(result, result+sz);
3151 clReleaseCommandQueue (queue);
3152 clReleaseMemObject(buffer);
3153 clReleaseMemObject(buffer2);
3154 clReleaseEvent(gpuExec);
3161 size_t sz = v->size();
3162 size_t sz2 = v2.size();
3163 size_t typesz =
sizeof(float) * sz;
3164 size_t typesz2 =
sizeof(int) * sz2;
3165 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3169 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3175 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3179 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3181 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3183 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3184 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3185 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3187 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3189 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3192 size_t size[2] = {sz, sz2};
3193 size_t work_dimension = 2;
3196 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3199 else if(temp_sz > 0){
3201 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3202 For default multidimensional global work size, leave the global_work_size vector empty, \ 3203 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3204 the global_work_size vector.");
3214 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3221 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3223 clWaitForEvents(1, &gpuExec);
3225 float *result = (
float *) malloc(typesz);
3226 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3228 v->assign(result, result+sz);
3230 clReleaseCommandQueue (queue);
3231 clReleaseMemObject(buffer);
3232 clReleaseMemObject(buffer2);
3233 clReleaseEvent(gpuExec);
3238 size_t sz = v->size();
3239 size_t sz2 = v2->size();
3240 size_t typesz =
sizeof(float) * sz;
3241 size_t typesz2 =
sizeof(int) * sz2;
3242 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3246 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3252 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3256 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3258 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3260 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3261 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3262 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3264 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3266 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3269 size_t size[2] = {sz, sz2};
3270 size_t work_dimension = 2;
3273 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3276 else if(temp_sz > 0){
3278 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3279 For default multidimensional global work size, leave the global_work_size vector empty, \ 3280 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3281 the global_work_size vector.");
3291 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3297 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3299 clWaitForEvents(1, &gpuExec);
3301 float *result = (
float *) malloc(typesz);
3302 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3304 v->assign(result, result+sz);
3306 int *result2 = (
int *) malloc(typesz2);
3307 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3309 v2->assign(result2, result2+sz2);
3311 clReleaseCommandQueue (queue);
3312 clReleaseMemObject(buffer);
3313 clReleaseMemObject(buffer2);
3314 clReleaseEvent(gpuExec);
3320 size_t sz = v.size();
3321 size_t sz2 = v2.size();
3322 size_t typesz =
sizeof(float) * sz;
3323 size_t typesz2 =
sizeof(float) * sz2;
3324 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3328 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3334 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3338 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3340 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3342 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3343 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3344 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3346 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3348 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3351 size_t size[2] = {sz, sz2};
3352 size_t work_dimension = 2;
3355 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3358 else if(temp_sz > 0){
3360 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3361 For default multidimensional global work size, leave the global_work_size vector empty, \ 3362 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3363 the global_work_size vector.");
3373 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3380 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3382 clWaitForEvents(1, &gpuExec);
3384 float *result = (
float *) malloc(typesz);
3385 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3387 std::vector<float> res = std::vector<float>();
3388 res.assign(result, result+sz);
3390 clReleaseCommandQueue (queue);
3391 clReleaseMemObject(buffer);
3392 clReleaseMemObject(buffer2);
3393 clReleaseEvent(gpuExec);
3400 size_t sz = v->size();
3401 size_t sz2 = v2.size();
3402 size_t typesz =
sizeof(float) * sz;
3403 size_t typesz2 =
sizeof(float) * sz2;
3404 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3408 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3414 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3418 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3420 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3422 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3423 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3424 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3426 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3428 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3431 size_t size[2] = {sz, sz2};
3432 size_t work_dimension = 2;
3435 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3438 else if(temp_sz > 0){
3440 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3441 For default multidimensional global work size, leave the global_work_size vector empty, \ 3442 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3443 the global_work_size vector.");
3453 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3460 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3462 clWaitForEvents(1, &gpuExec);
3464 float *result = (
float *) malloc(typesz);
3465 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3467 v->assign(result, result+sz);
3469 clReleaseCommandQueue (queue);
3470 clReleaseMemObject(buffer);
3471 clReleaseMemObject(buffer2);
3472 clReleaseEvent(gpuExec);
3477 size_t sz = v->size();
3478 size_t sz2 = v2->size();
3479 size_t typesz =
sizeof(float) * sz;
3480 size_t typesz2 =
sizeof(float) * sz2;
3481 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3485 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3491 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3495 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3497 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3499 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3500 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3501 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3503 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3505 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3508 size_t size[2] = {sz, sz2};
3509 size_t work_dimension = 2;
3512 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3515 else if(temp_sz > 0){
3517 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3518 For default multidimensional global work size, leave the global_work_size vector empty, \ 3519 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3520 the global_work_size vector.");
3530 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3536 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3538 clWaitForEvents(1, &gpuExec);
3540 float *result = (
float *) malloc(typesz);
3541 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3543 v->assign(result, result+sz);
3545 if (typesz2 != typesz or sz != sz2){
3547 result2 = (
float *) malloc(typesz2);
3548 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3550 v2->assign(result2, result2+sz2);
3554 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
3556 v2->assign(result, result+sz2);
3559 clReleaseCommandQueue (queue);
3560 clReleaseMemObject(buffer);
3561 clReleaseMemObject(buffer2);
3562 clReleaseEvent(gpuExec);
3567 size_t sz = v.size();
3568 size_t sz2 = v2.size();
3569 size_t typesz =
sizeof(float) * sz;
3570 size_t typesz2 =
sizeof(double) * sz2;
3571 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3575 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3581 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3585 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3587 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3589 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3590 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3591 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3593 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3595 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3598 size_t size[2] = {sz, sz2};
3599 size_t work_dimension = 2;
3602 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3605 else if(temp_sz > 0){
3607 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3608 For default multidimensional global work size, leave the global_work_size vector empty, \ 3609 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3610 the global_work_size vector.");
3620 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3627 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3629 clWaitForEvents(1, &gpuExec);
3631 float *result = (
float *) malloc(typesz);
3632 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3634 std::vector<float> res = std::vector<float>();
3635 res.assign(result, result+sz);
3637 clReleaseCommandQueue (queue);
3638 clReleaseMemObject(buffer);
3639 clReleaseMemObject(buffer2);
3640 clReleaseEvent(gpuExec);
3647 size_t sz = v->size();
3648 size_t sz2 = v2.size();
3649 size_t typesz =
sizeof(float) * sz;
3650 size_t typesz2 =
sizeof(double) * sz2;
3651 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3655 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3661 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3665 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3667 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3669 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3670 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3671 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3673 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3675 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3678 size_t size[2] = {sz, sz2};
3679 size_t work_dimension = 2;
3682 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3685 else if(temp_sz > 0){
3687 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3688 For default multidimensional global work size, leave the global_work_size vector empty, \ 3689 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3690 the global_work_size vector.");
3700 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3707 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3709 clWaitForEvents(1, &gpuExec);
3711 float *result = (
float *) malloc(typesz);
3712 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3714 v->assign(result, result+sz);
3716 clReleaseCommandQueue (queue);
3717 clReleaseMemObject(buffer);
3718 clReleaseMemObject(buffer2);
3719 clReleaseEvent(gpuExec);
3724 size_t sz = v->size();
3725 size_t sz2 = v2->size();
3726 size_t typesz =
sizeof(float) * sz;
3727 size_t typesz2 =
sizeof(double) * sz2;
3728 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3732 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3738 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3742 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3744 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3746 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3747 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3748 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3750 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3752 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3755 size_t size[2] = {sz, sz2};
3756 size_t work_dimension = 2;
3759 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3762 else if(temp_sz > 0){
3764 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3765 For default multidimensional global work size, leave the global_work_size vector empty, \ 3766 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3767 the global_work_size vector.");
3777 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3783 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3785 clWaitForEvents(1, &gpuExec);
3787 float *result = (
float *) malloc(typesz);
3788 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3790 v->assign(result, result+sz);
3792 double *result2 = (
double *) malloc(typesz2);
3793 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
3795 v2->assign(result2, result2+sz2);
3797 clReleaseCommandQueue (queue);
3798 clReleaseMemObject(buffer);
3799 clReleaseMemObject(buffer2);
3800 clReleaseEvent(gpuExec);
3806 size_t sz = v.size();
3807 size_t sz2 = v2.size();
3808 size_t typesz =
sizeof(double) * sz;
3809 size_t typesz2 =
sizeof(char) * sz2;
3810 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3814 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3820 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3824 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3826 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3828 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3829 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3830 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3832 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
3834 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3837 size_t size[2] = {sz, sz2};
3838 size_t work_dimension = 2;
3841 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3844 else if(temp_sz > 0){
3846 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3847 For default multidimensional global work size, leave the global_work_size vector empty, \ 3848 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3849 the global_work_size vector.");
3859 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3866 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3868 clWaitForEvents(1, &gpuExec);
3870 double *result = (
double *) malloc(typesz);
3871 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3873 std::vector<double> res = std::vector<double>();
3874 res.assign(result, result+sz);
3876 clReleaseCommandQueue (queue);
3877 clReleaseMemObject(buffer);
3878 clReleaseMemObject(buffer2);
3879 clReleaseEvent(gpuExec);
3886 size_t sz = v->size();
3887 size_t sz2 = v2.size();
3888 size_t typesz =
sizeof(double) * sz;
3889 size_t typesz2 =
sizeof(char) * sz2;
3890 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3894 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3900 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3904 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3906 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
3908 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3909 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3910 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3912 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3914 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
3917 size_t size[2] = {sz, sz2};
3918 size_t work_dimension = 2;
3921 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
3924 else if(temp_sz > 0){
3926 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 3927 For default multidimensional global work size, leave the global_work_size vector empty, \ 3928 and set multi_dimensional to true. Setting the global work size based on the values inside \ 3929 the global_work_size vector.");
3939 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
3946 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
3948 clWaitForEvents(1, &gpuExec);
3950 double *result = (
double *) malloc(typesz);
3951 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
3953 v->assign(result, result+sz);
3955 clReleaseCommandQueue (queue);
3956 clReleaseMemObject(buffer);
3957 clReleaseMemObject(buffer2);
3958 clReleaseEvent(gpuExec);
3963 size_t sz = v->size();
3964 size_t sz2 = v2->size();
3965 size_t typesz =
sizeof(double) * sz;
3966 size_t typesz2 =
sizeof(char) * sz2;
3967 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
3971 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
3977 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
3981 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
3983 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
3985 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
3986 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
3987 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
3989 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
3991 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
3994 size_t size[2] = {sz, sz2};
3995 size_t work_dimension = 2;
3998 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4001 else if(temp_sz > 0){
4003 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4004 For default multidimensional global work size, leave the global_work_size vector empty, \ 4005 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4006 the global_work_size vector.");
4016 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4022 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4024 clWaitForEvents(1, &gpuExec);
4026 double *result = (
double *) malloc(typesz);
4027 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4029 v->assign(result, result+sz);
4031 char *result2 = (
char *) malloc(typesz2);
4032 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4034 v2->assign(result2, result2+sz2);
4036 clReleaseCommandQueue (queue);
4037 clReleaseMemObject(buffer);
4038 clReleaseMemObject(buffer2);
4039 clReleaseEvent(gpuExec);
4045 size_t sz = v.size();
4046 size_t sz2 = v2.size();
4047 size_t typesz =
sizeof(double) * sz;
4048 size_t typesz2 =
sizeof(int) * sz2;
4049 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4053 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4059 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4063 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4065 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4067 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4068 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4069 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4071 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4073 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4076 size_t size[2] = {sz, sz2};
4077 size_t work_dimension = 2;
4080 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4083 else if(temp_sz > 0){
4085 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4086 For default multidimensional global work size, leave the global_work_size vector empty, \ 4087 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4088 the global_work_size vector.");
4098 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4105 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4107 clWaitForEvents(1, &gpuExec);
4109 double *result = (
double *) malloc(typesz);
4110 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4112 std::vector<double> res = std::vector<double>();
4113 res.assign(result, result+sz);
4115 clReleaseCommandQueue (queue);
4116 clReleaseMemObject(buffer);
4117 clReleaseMemObject(buffer2);
4118 clReleaseEvent(gpuExec);
4125 size_t sz = v->size();
4126 size_t sz2 = v2.size();
4127 size_t typesz =
sizeof(double) * sz;
4128 size_t typesz2 =
sizeof(int) * sz2;
4129 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4133 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4139 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4143 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4145 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4147 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4148 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4149 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4151 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4153 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4156 size_t size[2] = {sz, sz2};
4157 size_t work_dimension = 2;
4160 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4163 else if(temp_sz > 0){
4165 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4166 For default multidimensional global work size, leave the global_work_size vector empty, \ 4167 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4168 the global_work_size vector.");
4178 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4185 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4187 clWaitForEvents(1, &gpuExec);
4189 double *result = (
double *) malloc(typesz);
4190 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4192 v->assign(result, result+sz);
4194 clReleaseCommandQueue (queue);
4195 clReleaseMemObject(buffer);
4196 clReleaseMemObject(buffer2);
4197 clReleaseEvent(gpuExec);
4202 size_t sz = v->size();
4203 size_t sz2 = v2->size();
4204 size_t typesz =
sizeof(double) * sz;
4205 size_t typesz2 =
sizeof(int) * sz2;
4206 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4210 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4216 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4220 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4222 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4224 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4225 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4226 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4228 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4230 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4233 size_t size[2] = {sz, sz2};
4234 size_t work_dimension = 2;
4237 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4240 else if(temp_sz > 0){
4242 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4243 For default multidimensional global work size, leave the global_work_size vector empty, \ 4244 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4245 the global_work_size vector.");
4255 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4261 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4263 clWaitForEvents(1, &gpuExec);
4265 double *result = (
double *) malloc(typesz);
4266 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4268 v->assign(result, result+sz);
4270 int *result2 = (
int *) malloc(typesz2);
4271 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4273 v2->assign(result2, result2+sz2);
4275 clReleaseCommandQueue (queue);
4276 clReleaseMemObject(buffer);
4277 clReleaseMemObject(buffer2);
4278 clReleaseEvent(gpuExec);
4284 size_t sz = v.size();
4285 size_t sz2 = v2.size();
4286 size_t typesz =
sizeof(double) * sz;
4287 size_t typesz2 =
sizeof(float) * sz2;
4288 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4292 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4298 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4302 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4304 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4306 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4307 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4308 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4310 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4312 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4315 size_t size[2] = {sz, sz2};
4316 size_t work_dimension = 2;
4319 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4322 else if(temp_sz > 0){
4324 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4325 For default multidimensional global work size, leave the global_work_size vector empty, \ 4326 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4327 the global_work_size vector.");
4337 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4344 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4346 clWaitForEvents(1, &gpuExec);
4348 double *result = (
double *) malloc(typesz);
4349 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4351 std::vector<double> res = std::vector<double>();
4352 res.assign(result, result+sz);
4354 clReleaseCommandQueue (queue);
4355 clReleaseMemObject(buffer);
4356 clReleaseMemObject(buffer2);
4357 clReleaseEvent(gpuExec);
4364 size_t sz = v->size();
4365 size_t sz2 = v2.size();
4366 size_t typesz =
sizeof(double) * sz;
4367 size_t typesz2 =
sizeof(float) * sz2;
4368 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4372 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4378 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4382 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4384 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4386 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4387 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4388 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4390 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4392 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4395 size_t size[2] = {sz, sz2};
4396 size_t work_dimension = 2;
4399 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4402 else if(temp_sz > 0){
4404 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4405 For default multidimensional global work size, leave the global_work_size vector empty, \ 4406 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4407 the global_work_size vector.");
4417 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4424 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4426 clWaitForEvents(1, &gpuExec);
4428 double *result = (
double *) malloc(typesz);
4429 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4431 v->assign(result, result+sz);
4433 clReleaseCommandQueue (queue);
4434 clReleaseMemObject(buffer);
4435 clReleaseMemObject(buffer2);
4436 clReleaseEvent(gpuExec);
4441 size_t sz = v->size();
4442 size_t sz2 = v2->size();
4443 size_t typesz =
sizeof(double) * sz;
4444 size_t typesz2 =
sizeof(float) * sz2;
4445 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4449 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4455 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4459 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4461 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4463 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4464 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4465 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4467 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4469 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4472 size_t size[2] = {sz, sz2};
4473 size_t work_dimension = 2;
4476 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4479 else if(temp_sz > 0){
4481 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4482 For default multidimensional global work size, leave the global_work_size vector empty, \ 4483 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4484 the global_work_size vector.");
4494 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4500 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4502 clWaitForEvents(1, &gpuExec);
4504 double *result = (
double *) malloc(typesz);
4505 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4507 v->assign(result, result+sz);
4509 float *result2 = (
float *) malloc(typesz2);
4510 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4512 v2->assign(result2, result2+sz2);
4514 clReleaseCommandQueue (queue);
4515 clReleaseMemObject(buffer);
4516 clReleaseMemObject(buffer2);
4517 clReleaseEvent(gpuExec);
4523 size_t sz = v.size();
4524 size_t sz2 = v2.size();
4525 size_t typesz =
sizeof(double) * sz;
4526 size_t typesz2 =
sizeof(double) * sz2;
4527 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4531 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4537 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4541 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4543 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4545 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4546 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4547 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4549 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4551 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4554 size_t size[2] = {sz, sz2};
4555 size_t work_dimension = 2;
4558 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4561 else if(temp_sz > 0){
4563 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4564 For default multidimensional global work size, leave the global_work_size vector empty, \ 4565 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4566 the global_work_size vector.");
4576 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4583 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4585 clWaitForEvents(1, &gpuExec);
4587 double *result = (
double *) malloc(typesz);
4588 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4590 std::vector<double> res = std::vector<double>();
4591 res.assign(result, result+sz);
4593 clReleaseCommandQueue (queue);
4594 clReleaseMemObject(buffer);
4595 clReleaseMemObject(buffer2);
4596 clReleaseEvent(gpuExec);
4603 size_t sz = v->size();
4604 size_t sz2 = v2.size();
4605 size_t typesz =
sizeof(double) * sz;
4606 size_t typesz2 =
sizeof(double) * sz2;
4607 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4611 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4617 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4621 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4623 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4625 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4626 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4627 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4629 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4631 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4634 size_t size[2] = {sz, sz2};
4635 size_t work_dimension = 2;
4638 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4641 else if(temp_sz > 0){
4643 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4644 For default multidimensional global work size, leave the global_work_size vector empty, \ 4645 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4646 the global_work_size vector.");
4656 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4663 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4665 clWaitForEvents(1, &gpuExec);
4667 double *result = (
double *) malloc(typesz);
4668 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4670 v->assign(result, result+sz);
4672 clReleaseCommandQueue (queue);
4673 clReleaseMemObject(buffer);
4674 clReleaseMemObject(buffer2);
4675 clReleaseEvent(gpuExec);
4680 size_t sz = v->size();
4681 size_t sz2 = v2->size();
4682 size_t typesz =
sizeof(double) * sz;
4683 size_t typesz2 =
sizeof(double) * sz2;
4684 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4688 ROS_WARN(
"buffer_size includes more than two elements. Exactly two are needed. Using the first two...");
4694 ROS_WARN(
"buffer_size includes only one element. Exactly two are needed for custom buffer sizes. Using default values...");
4698 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4700 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4702 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4703 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4704 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4706 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4708 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4711 size_t size[2] = {sz, sz2};
4712 size_t work_dimension = 2;
4715 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4718 else if(temp_sz > 0){
4720 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4721 For default multidimensional global work size, leave the global_work_size vector empty, \ 4722 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4723 the global_work_size vector.");
4733 ROS_WARN(
"global_work_size includes more than two elements. A maximum of two is allowed. Using the first two...");
4739 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4741 clWaitForEvents(1, &gpuExec);
4743 double *result = (
double *) malloc(typesz);
4744 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4746 v->assign(result, result+sz);
4748 if (typesz2 != typesz or sz != sz2){
4750 result2 = (
double *) malloc(typesz2);
4751 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
4753 v2->assign(result2, result2+sz2);
4757 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
4759 v2->assign(result, result+sz2);
4762 clReleaseCommandQueue (queue);
4763 clReleaseMemObject(buffer);
4764 clReleaseMemObject(buffer2);
4765 clReleaseEvent(gpuExec);
4772 size_t sz = v.size();
4773 size_t sz2 = v2.size();
4774 size_t sz3 = v3.size();
4775 size_t typesz =
sizeof(char) * sz;
4776 size_t typesz2 =
sizeof(char) * sz2;
4777 size_t typesz3 =
sizeof(char) * sz3;
4778 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4782 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4789 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4793 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4795 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4797 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4799 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4800 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4801 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
4802 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4804 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
4806 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4808 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
4811 size_t size[3] = {sz, sz2, sz3};
4812 size_t work_dimension = 3;
4815 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4818 else if(temp_sz > 0){
4820 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4821 For default multidimensional global work size, leave the global_work_size vector empty, \ 4822 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4823 the global_work_size vector.");
4829 else if (temp_sz == 2){
4839 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
4846 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4848 clWaitForEvents(1, &gpuExec);
4850 char *result = (
char *) malloc(typesz);
4851 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4853 std::vector<char> res = std::vector<char>();
4854 res.assign(result, result+sz);
4856 clReleaseCommandQueue (queue);
4857 clReleaseMemObject(buffer);
4858 clReleaseMemObject(buffer2);
4859 clReleaseMemObject(buffer3);
4860 clReleaseEvent(gpuExec);
4867 size_t sz = v->size();
4868 size_t sz2 = v2.size();
4869 size_t sz3 = v3.size();
4870 size_t typesz =
sizeof(char) * sz;
4871 size_t typesz2 =
sizeof(char) * sz2;
4872 size_t typesz3 =
sizeof(char) * sz3;
4873 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4877 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4884 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4888 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4890 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
4892 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4894 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4895 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4896 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
4897 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4899 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4901 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
4903 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
4906 size_t size[3] = {sz, sz2, sz3};
4907 size_t work_dimension = 3;
4910 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
4913 else if(temp_sz > 0){
4915 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 4916 For default multidimensional global work size, leave the global_work_size vector empty, \ 4917 and set multi_dimensional to true. Setting the global work size based on the values inside \ 4918 the global_work_size vector.");
4924 else if (temp_sz == 2){
4934 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
4941 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4943 clWaitForEvents(1, &gpuExec);
4945 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
4947 clWaitForEvents(1, &gpuExec);
4949 char *result = (
char *) malloc(typesz);
4950 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
4952 v->assign(result, result+sz);
4954 clReleaseCommandQueue (queue);
4955 clReleaseMemObject(buffer);
4956 clReleaseMemObject(buffer2);
4957 clReleaseMemObject(buffer3);
4958 clReleaseEvent(gpuExec);
4963 size_t sz = v->size();
4964 size_t sz2 = v2->size();
4965 size_t sz3 = v3.size();
4966 size_t typesz =
sizeof(char) * sz;
4967 size_t typesz2 =
sizeof(char) * sz2;
4968 size_t typesz3 =
sizeof(char) * sz3;
4969 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
4973 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
4980 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
4984 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
4986 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
4988 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
4990 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
4991 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
4992 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
4993 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
4995 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
4997 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
4999 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5002 size_t size[3] = {sz, sz2, sz3};
5003 size_t work_dimension = 3;
5006 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5009 else if(temp_sz > 0){
5011 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5012 For default multidimensional global work size, leave the global_work_size vector empty, \ 5013 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5014 the global_work_size vector.");
5020 else if (temp_sz == 2){
5030 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5037 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5039 clWaitForEvents(1, &gpuExec);
5041 char *result = (
char *) malloc(typesz);
5042 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5044 v->assign(result, result+sz);
5046 if (typesz2 != typesz or sz != sz2){
5048 result2 = (
char *) malloc(typesz2);
5049 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5051 v2->assign(result2, result2+sz2);
5055 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5057 v2->assign(result, result+sz2);
5060 clReleaseCommandQueue (queue);
5061 clReleaseMemObject(buffer);
5062 clReleaseMemObject(buffer2);
5063 clReleaseMemObject(buffer3);
5064 clReleaseEvent(gpuExec);
5069 size_t sz = v->size();
5070 size_t sz2 = v2->size();
5071 size_t sz3 = v3->size();
5072 size_t typesz =
sizeof(char) * sz;
5073 size_t typesz2 =
sizeof(char) * sz2;
5074 size_t typesz3 =
sizeof(char) * sz3;
5075 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5079 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5086 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5090 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5092 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5094 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5096 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5097 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5098 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5099 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5101 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5103 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5105 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5108 size_t size[3] = {sz, sz2, sz3};
5109 size_t work_dimension = 3;
5112 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5115 else if(temp_sz > 0){
5117 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5118 For default multidimensional global work size, leave the global_work_size vector empty, \ 5119 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5120 the global_work_size vector.");
5126 else if (temp_sz == 2){
5136 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5143 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5145 clWaitForEvents(1, &gpuExec);
5147 char *result = (
char *) malloc(typesz);
5148 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5150 v->assign(result, result+sz);
5152 if (typesz2 != typesz or sz != sz2){
5154 result2 = (
char *) malloc(typesz2);
5155 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5157 v2->assign(result2, result2+sz2);
5161 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5163 v2->assign(result, result+sz2);
5166 if (typesz3 != typesz or sz != sz3){
5168 result3 = (
char *) malloc(typesz3);
5169 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
5171 v3->assign(result3, result3+sz3);
5175 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5177 v3->assign(result, result+sz3);
5180 clReleaseCommandQueue (queue);
5181 clReleaseMemObject(buffer);
5182 clReleaseMemObject(buffer2);
5183 clReleaseMemObject(buffer3);
5184 clReleaseEvent(gpuExec);
5190 size_t sz = v.size();
5191 size_t sz2 = v2.size();
5192 size_t sz3 = v3.size();
5193 size_t typesz =
sizeof(char) * sz;
5194 size_t typesz2 =
sizeof(char) * sz2;
5195 size_t typesz3 =
sizeof(int) * sz3;
5196 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5200 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5207 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5211 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5213 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5215 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5217 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5218 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5219 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5220 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5222 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
5224 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5226 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5229 size_t size[3] = {sz, sz2, sz3};
5230 size_t work_dimension = 3;
5233 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5236 else if(temp_sz > 0){
5238 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5239 For default multidimensional global work size, leave the global_work_size vector empty, \ 5240 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5241 the global_work_size vector.");
5247 else if (temp_sz == 2){
5257 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5264 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5266 clWaitForEvents(1, &gpuExec);
5268 char *result = (
char *) malloc(typesz);
5269 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5271 std::vector<char> res = std::vector<char>();
5272 res.assign(result, result+sz);
5274 clReleaseCommandQueue (queue);
5275 clReleaseMemObject(buffer);
5276 clReleaseMemObject(buffer2);
5277 clReleaseMemObject(buffer3);
5278 clReleaseEvent(gpuExec);
5285 size_t sz = v->size();
5286 size_t sz2 = v2.size();
5287 size_t sz3 = v3.size();
5288 size_t typesz =
sizeof(char) * sz;
5289 size_t typesz2 =
sizeof(char) * sz2;
5290 size_t typesz3 =
sizeof(int) * sz3;
5291 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5295 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5302 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5306 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5308 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5310 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5312 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5313 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5314 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5315 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5317 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5319 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5321 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5324 size_t size[3] = {sz, sz2, sz3};
5325 size_t work_dimension = 3;
5328 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5331 else if(temp_sz > 0){
5333 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5334 For default multidimensional global work size, leave the global_work_size vector empty, \ 5335 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5336 the global_work_size vector.");
5342 else if (temp_sz == 2){
5352 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5359 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5361 clWaitForEvents(1, &gpuExec);
5363 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5365 clWaitForEvents(1, &gpuExec);
5367 char *result = (
char *) malloc(typesz);
5368 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5370 v->assign(result, result+sz);
5372 clReleaseCommandQueue (queue);
5373 clReleaseMemObject(buffer);
5374 clReleaseMemObject(buffer2);
5375 clReleaseMemObject(buffer3);
5376 clReleaseEvent(gpuExec);
5381 size_t sz = v->size();
5382 size_t sz2 = v2->size();
5383 size_t sz3 = v3.size();
5384 size_t typesz =
sizeof(char) * sz;
5385 size_t typesz2 =
sizeof(char) * sz2;
5386 size_t typesz3 =
sizeof(int) * sz3;
5387 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5391 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5398 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5402 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5404 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5406 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5408 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5409 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5410 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5411 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5413 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5415 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5417 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5420 size_t size[3] = {sz, sz2, sz3};
5421 size_t work_dimension = 3;
5424 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5427 else if(temp_sz > 0){
5429 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5430 For default multidimensional global work size, leave the global_work_size vector empty, \ 5431 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5432 the global_work_size vector.");
5438 else if (temp_sz == 2){
5448 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5455 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5457 clWaitForEvents(1, &gpuExec);
5459 char *result = (
char *) malloc(typesz);
5460 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5462 v->assign(result, result+sz);
5464 if (typesz2 != typesz or sz != sz2){
5466 result2 = (
char *) malloc(typesz2);
5467 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5469 v2->assign(result2, result2+sz2);
5473 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5475 v2->assign(result, result+sz2);
5478 clReleaseCommandQueue (queue);
5479 clReleaseMemObject(buffer);
5480 clReleaseMemObject(buffer2);
5481 clReleaseMemObject(buffer3);
5482 clReleaseEvent(gpuExec);
5487 size_t sz = v->size();
5488 size_t sz2 = v2->size();
5489 size_t sz3 = v3->size();
5490 size_t typesz =
sizeof(char) * sz;
5491 size_t typesz2 =
sizeof(char) * sz2;
5492 size_t typesz3 =
sizeof(int) * sz3;
5493 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5497 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5504 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5508 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5510 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5512 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5514 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5515 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5516 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5517 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5519 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5521 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5523 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5526 size_t size[3] = {sz, sz2, sz3};
5527 size_t work_dimension = 3;
5530 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5533 else if(temp_sz > 0){
5535 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5536 For default multidimensional global work size, leave the global_work_size vector empty, \ 5537 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5538 the global_work_size vector.");
5544 else if (temp_sz == 2){
5554 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5561 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5563 clWaitForEvents(1, &gpuExec);
5565 char *result = (
char *) malloc(typesz);
5566 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5568 v->assign(result, result+sz);
5570 if (typesz2 != typesz or sz != sz2){
5572 result2 = (
char *) malloc(typesz2);
5573 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5575 v2->assign(result2, result2+sz2);
5579 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5581 v2->assign(result, result+sz2);
5584 if (typesz3 != typesz or sz != sz3){
5586 result3 = (
int *) malloc(typesz3);
5587 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
5589 v3->assign(result3, result3+sz3);
5593 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5595 v3->assign(result, result+sz3);
5598 clReleaseCommandQueue (queue);
5599 clReleaseMemObject(buffer);
5600 clReleaseMemObject(buffer2);
5601 clReleaseMemObject(buffer3);
5602 clReleaseEvent(gpuExec);
5608 size_t sz = v.size();
5609 size_t sz2 = v2.size();
5610 size_t sz3 = v3.size();
5611 size_t typesz =
sizeof(char) * sz;
5612 size_t typesz2 =
sizeof(char) * sz2;
5613 size_t typesz3 =
sizeof(float) * sz3;
5614 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5618 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5625 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5629 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5631 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5633 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5635 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5636 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5637 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5638 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5640 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
5642 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5644 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5647 size_t size[3] = {sz, sz2, sz3};
5648 size_t work_dimension = 3;
5651 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5654 else if(temp_sz > 0){
5656 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5657 For default multidimensional global work size, leave the global_work_size vector empty, \ 5658 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5659 the global_work_size vector.");
5665 else if (temp_sz == 2){
5675 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5682 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5684 clWaitForEvents(1, &gpuExec);
5686 char *result = (
char *) malloc(typesz);
5687 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5689 std::vector<char> res = std::vector<char>();
5690 res.assign(result, result+sz);
5692 clReleaseCommandQueue (queue);
5693 clReleaseMemObject(buffer);
5694 clReleaseMemObject(buffer2);
5695 clReleaseMemObject(buffer3);
5696 clReleaseEvent(gpuExec);
5703 size_t sz = v->size();
5704 size_t sz2 = v2.size();
5705 size_t sz3 = v3.size();
5706 size_t typesz =
sizeof(char) * sz;
5707 size_t typesz2 =
sizeof(char) * sz2;
5708 size_t typesz3 =
sizeof(float) * sz3;
5709 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5713 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5720 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5724 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5726 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
5728 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5730 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5731 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5732 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5733 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5735 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5737 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
5739 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5742 size_t size[3] = {sz, sz2, sz3};
5743 size_t work_dimension = 3;
5746 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5749 else if(temp_sz > 0){
5751 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5752 For default multidimensional global work size, leave the global_work_size vector empty, \ 5753 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5754 the global_work_size vector.");
5760 else if (temp_sz == 2){
5770 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5777 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5779 clWaitForEvents(1, &gpuExec);
5781 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5783 clWaitForEvents(1, &gpuExec);
5785 char *result = (
char *) malloc(typesz);
5786 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5788 v->assign(result, result+sz);
5790 clReleaseCommandQueue (queue);
5791 clReleaseMemObject(buffer);
5792 clReleaseMemObject(buffer2);
5793 clReleaseMemObject(buffer3);
5794 clReleaseEvent(gpuExec);
5799 size_t sz = v->size();
5800 size_t sz2 = v2->size();
5801 size_t sz3 = v3.size();
5802 size_t typesz =
sizeof(char) * sz;
5803 size_t typesz2 =
sizeof(char) * sz2;
5804 size_t typesz3 =
sizeof(float) * sz3;
5805 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5809 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5816 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5820 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5822 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5824 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
5826 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5827 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5828 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5829 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5831 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5833 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5835 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
5838 size_t size[3] = {sz, sz2, sz3};
5839 size_t work_dimension = 3;
5842 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5845 else if(temp_sz > 0){
5847 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5848 For default multidimensional global work size, leave the global_work_size vector empty, \ 5849 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5850 the global_work_size vector.");
5856 else if (temp_sz == 2){
5866 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5873 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5875 clWaitForEvents(1, &gpuExec);
5877 char *result = (
char *) malloc(typesz);
5878 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5880 v->assign(result, result+sz);
5882 if (typesz2 != typesz or sz != sz2){
5884 result2 = (
char *) malloc(typesz2);
5885 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5887 v2->assign(result2, result2+sz2);
5891 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5893 v2->assign(result, result+sz2);
5896 clReleaseCommandQueue (queue);
5897 clReleaseMemObject(buffer);
5898 clReleaseMemObject(buffer2);
5899 clReleaseMemObject(buffer3);
5900 clReleaseEvent(gpuExec);
5905 size_t sz = v->size();
5906 size_t sz2 = v2->size();
5907 size_t sz3 = v3->size();
5908 size_t typesz =
sizeof(char) * sz;
5909 size_t typesz2 =
sizeof(char) * sz2;
5910 size_t typesz3 =
sizeof(float) * sz3;
5911 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
5915 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
5922 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
5926 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
5928 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
5930 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
5932 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
5933 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
5934 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
5935 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
5937 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
5939 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
5941 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
5944 size_t size[3] = {sz, sz2, sz3};
5945 size_t work_dimension = 3;
5948 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
5951 else if(temp_sz > 0){
5953 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 5954 For default multidimensional global work size, leave the global_work_size vector empty, \ 5955 and set multi_dimensional to true. Setting the global work size based on the values inside \ 5956 the global_work_size vector.");
5962 else if (temp_sz == 2){
5972 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
5979 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
5981 clWaitForEvents(1, &gpuExec);
5983 char *result = (
char *) malloc(typesz);
5984 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
5986 v->assign(result, result+sz);
5988 if (typesz2 != typesz or sz != sz2){
5990 result2 = (
char *) malloc(typesz2);
5991 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
5993 v2->assign(result2, result2+sz2);
5997 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
5999 v2->assign(result, result+sz2);
6002 if (typesz3 != typesz or sz != sz3){
6004 result3 = (
float *) malloc(typesz3);
6005 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6007 v3->assign(result3, result3+sz3);
6011 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6013 v3->assign(result, result+sz3);
6016 clReleaseCommandQueue (queue);
6017 clReleaseMemObject(buffer);
6018 clReleaseMemObject(buffer2);
6019 clReleaseMemObject(buffer3);
6020 clReleaseEvent(gpuExec);
6026 size_t sz = v.size();
6027 size_t sz2 = v2.size();
6028 size_t sz3 = v3.size();
6029 size_t typesz =
sizeof(char) * sz;
6030 size_t typesz2 =
sizeof(char) * sz2;
6031 size_t typesz3 =
sizeof(double) * sz3;
6032 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6036 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6043 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6047 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6049 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6051 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6053 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6054 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6055 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6056 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6058 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6060 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6062 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6065 size_t size[3] = {sz, sz2, sz3};
6066 size_t work_dimension = 3;
6069 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6072 else if(temp_sz > 0){
6074 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6075 For default multidimensional global work size, leave the global_work_size vector empty, \ 6076 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6077 the global_work_size vector.");
6083 else if (temp_sz == 2){
6093 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6100 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6102 clWaitForEvents(1, &gpuExec);
6104 char *result = (
char *) malloc(typesz);
6105 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6107 std::vector<char> res = std::vector<char>();
6108 res.assign(result, result+sz);
6110 clReleaseCommandQueue (queue);
6111 clReleaseMemObject(buffer);
6112 clReleaseMemObject(buffer2);
6113 clReleaseMemObject(buffer3);
6114 clReleaseEvent(gpuExec);
6121 size_t sz = v->size();
6122 size_t sz2 = v2.size();
6123 size_t sz3 = v3.size();
6124 size_t typesz =
sizeof(char) * sz;
6125 size_t typesz2 =
sizeof(char) * sz2;
6126 size_t typesz3 =
sizeof(double) * sz3;
6127 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6131 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6138 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6142 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6144 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6146 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6148 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6149 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6150 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6151 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6153 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6155 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6157 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6160 size_t size[3] = {sz, sz2, sz3};
6161 size_t work_dimension = 3;
6164 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6167 else if(temp_sz > 0){
6169 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6170 For default multidimensional global work size, leave the global_work_size vector empty, \ 6171 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6172 the global_work_size vector.");
6178 else if (temp_sz == 2){
6188 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6195 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6197 clWaitForEvents(1, &gpuExec);
6199 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6201 clWaitForEvents(1, &gpuExec);
6203 char *result = (
char *) malloc(typesz);
6204 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6206 v->assign(result, result+sz);
6208 clReleaseCommandQueue (queue);
6209 clReleaseMemObject(buffer);
6210 clReleaseMemObject(buffer2);
6211 clReleaseMemObject(buffer3);
6212 clReleaseEvent(gpuExec);
6217 size_t sz = v->size();
6218 size_t sz2 = v2->size();
6219 size_t sz3 = v3.size();
6220 size_t typesz =
sizeof(char) * sz;
6221 size_t typesz2 =
sizeof(char) * sz2;
6222 size_t typesz3 =
sizeof(double) * sz3;
6223 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6227 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6234 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6238 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6240 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6242 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6244 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6245 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6246 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6247 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6249 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6251 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6253 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6256 size_t size[3] = {sz, sz2, sz3};
6257 size_t work_dimension = 3;
6260 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6263 else if(temp_sz > 0){
6265 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6266 For default multidimensional global work size, leave the global_work_size vector empty, \ 6267 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6268 the global_work_size vector.");
6274 else if (temp_sz == 2){
6284 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6291 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6293 clWaitForEvents(1, &gpuExec);
6295 char *result = (
char *) malloc(typesz);
6296 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6298 v->assign(result, result+sz);
6300 if (typesz2 != typesz or sz != sz2){
6302 result2 = (
char *) malloc(typesz2);
6303 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6305 v2->assign(result2, result2+sz2);
6309 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6311 v2->assign(result, result+sz2);
6314 clReleaseCommandQueue (queue);
6315 clReleaseMemObject(buffer);
6316 clReleaseMemObject(buffer2);
6317 clReleaseMemObject(buffer3);
6318 clReleaseEvent(gpuExec);
6323 size_t sz = v->size();
6324 size_t sz2 = v2->size();
6325 size_t sz3 = v3->size();
6326 size_t typesz =
sizeof(char) * sz;
6327 size_t typesz2 =
sizeof(char) * sz2;
6328 size_t typesz3 =
sizeof(double) * sz3;
6329 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6333 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6340 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6344 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6346 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6348 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
6350 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6351 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6352 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6353 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6355 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6357 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6359 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
6362 size_t size[3] = {sz, sz2, sz3};
6363 size_t work_dimension = 3;
6366 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6369 else if(temp_sz > 0){
6371 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6372 For default multidimensional global work size, leave the global_work_size vector empty, \ 6373 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6374 the global_work_size vector.");
6380 else if (temp_sz == 2){
6390 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6397 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6399 clWaitForEvents(1, &gpuExec);
6401 char *result = (
char *) malloc(typesz);
6402 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6404 v->assign(result, result+sz);
6406 if (typesz2 != typesz or sz != sz2){
6408 result2 = (
char *) malloc(typesz2);
6409 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6411 v2->assign(result2, result2+sz2);
6415 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6417 v2->assign(result, result+sz2);
6420 if (typesz3 != typesz or sz != sz3){
6422 result3 = (
double *) malloc(typesz3);
6423 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6425 v3->assign(result3, result3+sz3);
6429 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6431 v3->assign(result, result+sz3);
6434 clReleaseCommandQueue (queue);
6435 clReleaseMemObject(buffer);
6436 clReleaseMemObject(buffer2);
6437 clReleaseMemObject(buffer3);
6438 clReleaseEvent(gpuExec);
6444 size_t sz = v.size();
6445 size_t sz2 = v2.size();
6446 size_t sz3 = v3.size();
6447 size_t typesz =
sizeof(char) * sz;
6448 size_t typesz2 =
sizeof(int) * sz2;
6449 size_t typesz3 =
sizeof(char) * sz3;
6450 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6454 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6461 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6465 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6467 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6469 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6471 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6472 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6473 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6474 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6476 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6478 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6480 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6483 size_t size[3] = {sz, sz2, sz3};
6484 size_t work_dimension = 3;
6487 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6490 else if(temp_sz > 0){
6492 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6493 For default multidimensional global work size, leave the global_work_size vector empty, \ 6494 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6495 the global_work_size vector.");
6501 else if (temp_sz == 2){
6511 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6518 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6520 clWaitForEvents(1, &gpuExec);
6522 char *result = (
char *) malloc(typesz);
6523 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6525 std::vector<char> res = std::vector<char>();
6526 res.assign(result, result+sz);
6528 clReleaseCommandQueue (queue);
6529 clReleaseMemObject(buffer);
6530 clReleaseMemObject(buffer2);
6531 clReleaseMemObject(buffer3);
6532 clReleaseEvent(gpuExec);
6539 size_t sz = v->size();
6540 size_t sz2 = v2.size();
6541 size_t sz3 = v3.size();
6542 size_t typesz =
sizeof(char) * sz;
6543 size_t typesz2 =
sizeof(int) * sz2;
6544 size_t typesz3 =
sizeof(char) * sz3;
6545 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6549 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6556 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6560 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6562 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6564 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6566 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6567 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6568 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6569 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6571 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6573 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6575 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6578 size_t size[3] = {sz, sz2, sz3};
6579 size_t work_dimension = 3;
6582 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6585 else if(temp_sz > 0){
6587 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6588 For default multidimensional global work size, leave the global_work_size vector empty, \ 6589 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6590 the global_work_size vector.");
6596 else if (temp_sz == 2){
6606 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6613 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6615 clWaitForEvents(1, &gpuExec);
6617 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6619 clWaitForEvents(1, &gpuExec);
6621 char *result = (
char *) malloc(typesz);
6622 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6624 v->assign(result, result+sz);
6626 clReleaseCommandQueue (queue);
6627 clReleaseMemObject(buffer);
6628 clReleaseMemObject(buffer2);
6629 clReleaseMemObject(buffer3);
6630 clReleaseEvent(gpuExec);
6635 size_t sz = v->size();
6636 size_t sz2 = v2->size();
6637 size_t sz3 = v3.size();
6638 size_t typesz =
sizeof(char) * sz;
6639 size_t typesz2 =
sizeof(int) * sz2;
6640 size_t typesz3 =
sizeof(char) * sz3;
6641 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6645 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6652 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6656 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6658 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6660 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6662 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6663 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6664 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6665 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6667 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6669 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6671 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6674 size_t size[3] = {sz, sz2, sz3};
6675 size_t work_dimension = 3;
6678 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6681 else if(temp_sz > 0){
6683 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6684 For default multidimensional global work size, leave the global_work_size vector empty, \ 6685 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6686 the global_work_size vector.");
6692 else if (temp_sz == 2){
6702 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6709 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6711 clWaitForEvents(1, &gpuExec);
6713 char *result = (
char *) malloc(typesz);
6714 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6716 v->assign(result, result+sz);
6718 if (typesz2 != typesz or sz != sz2){
6720 result2 = (
int *) malloc(typesz2);
6721 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6723 v2->assign(result2, result2+sz2);
6727 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6729 v2->assign(result, result+sz2);
6732 clReleaseCommandQueue (queue);
6733 clReleaseMemObject(buffer);
6734 clReleaseMemObject(buffer2);
6735 clReleaseMemObject(buffer3);
6736 clReleaseEvent(gpuExec);
6741 size_t sz = v->size();
6742 size_t sz2 = v2->size();
6743 size_t sz3 = v3->size();
6744 size_t typesz =
sizeof(char) * sz;
6745 size_t typesz2 =
sizeof(int) * sz2;
6746 size_t typesz3 =
sizeof(char) * sz3;
6747 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6751 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6758 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6762 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6764 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
6766 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
6768 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6769 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6770 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6771 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6773 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6775 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
6777 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
6780 size_t size[3] = {sz, sz2, sz3};
6781 size_t work_dimension = 3;
6784 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6787 else if(temp_sz > 0){
6789 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6790 For default multidimensional global work size, leave the global_work_size vector empty, \ 6791 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6792 the global_work_size vector.");
6798 else if (temp_sz == 2){
6808 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6815 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6817 clWaitForEvents(1, &gpuExec);
6819 char *result = (
char *) malloc(typesz);
6820 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6822 v->assign(result, result+sz);
6824 if (typesz2 != typesz or sz != sz2){
6826 result2 = (
int *) malloc(typesz2);
6827 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
6829 v2->assign(result2, result2+sz2);
6833 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6835 v2->assign(result, result+sz2);
6838 if (typesz3 != typesz or sz != sz3){
6840 result3 = (
char *) malloc(typesz3);
6841 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
6843 v3->assign(result3, result3+sz3);
6847 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
6849 v3->assign(result, result+sz3);
6852 clReleaseCommandQueue (queue);
6853 clReleaseMemObject(buffer);
6854 clReleaseMemObject(buffer2);
6855 clReleaseMemObject(buffer3);
6856 clReleaseEvent(gpuExec);
6862 size_t sz = v.size();
6863 size_t sz2 = v2.size();
6864 size_t sz3 = v3.size();
6865 size_t typesz =
sizeof(char) * sz;
6866 size_t typesz2 =
sizeof(int) * sz2;
6867 size_t typesz3 =
sizeof(int) * sz3;
6868 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6872 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6879 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6883 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6885 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6887 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6889 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6890 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6891 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6892 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6894 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
6896 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6898 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6901 size_t size[3] = {sz, sz2, sz3};
6902 size_t work_dimension = 3;
6905 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
6908 else if(temp_sz > 0){
6910 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 6911 For default multidimensional global work size, leave the global_work_size vector empty, \ 6912 and set multi_dimensional to true. Setting the global work size based on the values inside \ 6913 the global_work_size vector.");
6919 else if (temp_sz == 2){
6929 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
6936 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
6938 clWaitForEvents(1, &gpuExec);
6940 char *result = (
char *) malloc(typesz);
6941 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
6943 std::vector<char> res = std::vector<char>();
6944 res.assign(result, result+sz);
6946 clReleaseCommandQueue (queue);
6947 clReleaseMemObject(buffer);
6948 clReleaseMemObject(buffer2);
6949 clReleaseMemObject(buffer3);
6950 clReleaseEvent(gpuExec);
6957 size_t sz = v->size();
6958 size_t sz2 = v2.size();
6959 size_t sz3 = v3.size();
6960 size_t typesz =
sizeof(char) * sz;
6961 size_t typesz2 =
sizeof(int) * sz2;
6962 size_t typesz3 =
sizeof(int) * sz3;
6963 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
6967 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
6974 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
6978 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
6980 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
6982 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
6984 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
6985 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
6986 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
6987 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
6989 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
6991 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
6993 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
6996 size_t size[3] = {sz, sz2, sz3};
6997 size_t work_dimension = 3;
7000 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7003 else if(temp_sz > 0){
7005 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7006 For default multidimensional global work size, leave the global_work_size vector empty, \ 7007 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7008 the global_work_size vector.");
7014 else if (temp_sz == 2){
7024 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7031 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7033 clWaitForEvents(1, &gpuExec);
7035 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7037 clWaitForEvents(1, &gpuExec);
7039 char *result = (
char *) malloc(typesz);
7040 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7042 v->assign(result, result+sz);
7044 clReleaseCommandQueue (queue);
7045 clReleaseMemObject(buffer);
7046 clReleaseMemObject(buffer2);
7047 clReleaseMemObject(buffer3);
7048 clReleaseEvent(gpuExec);
7053 size_t sz = v->size();
7054 size_t sz2 = v2->size();
7055 size_t sz3 = v3.size();
7056 size_t typesz =
sizeof(char) * sz;
7057 size_t typesz2 =
sizeof(int) * sz2;
7058 size_t typesz3 =
sizeof(int) * sz3;
7059 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7063 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7070 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7074 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7076 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7078 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7080 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7081 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7082 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7083 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7085 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7087 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7089 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7092 size_t size[3] = {sz, sz2, sz3};
7093 size_t work_dimension = 3;
7096 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7099 else if(temp_sz > 0){
7101 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7102 For default multidimensional global work size, leave the global_work_size vector empty, \ 7103 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7104 the global_work_size vector.");
7110 else if (temp_sz == 2){
7120 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7127 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7129 clWaitForEvents(1, &gpuExec);
7131 char *result = (
char *) malloc(typesz);
7132 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7134 v->assign(result, result+sz);
7136 if (typesz2 != typesz or sz != sz2){
7138 result2 = (
int *) malloc(typesz2);
7139 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7141 v2->assign(result2, result2+sz2);
7145 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7147 v2->assign(result, result+sz2);
7150 clReleaseCommandQueue (queue);
7151 clReleaseMemObject(buffer);
7152 clReleaseMemObject(buffer2);
7153 clReleaseMemObject(buffer3);
7154 clReleaseEvent(gpuExec);
7159 size_t sz = v->size();
7160 size_t sz2 = v2->size();
7161 size_t sz3 = v3->size();
7162 size_t typesz =
sizeof(char) * sz;
7163 size_t typesz2 =
sizeof(int) * sz2;
7164 size_t typesz3 =
sizeof(int) * sz3;
7165 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7169 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7176 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7180 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7182 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7184 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
7186 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7187 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7188 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7189 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7191 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7193 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7195 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
7198 size_t size[3] = {sz, sz2, sz3};
7199 size_t work_dimension = 3;
7202 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7205 else if(temp_sz > 0){
7207 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7208 For default multidimensional global work size, leave the global_work_size vector empty, \ 7209 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7210 the global_work_size vector.");
7216 else if (temp_sz == 2){
7226 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7233 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7235 clWaitForEvents(1, &gpuExec);
7237 char *result = (
char *) malloc(typesz);
7238 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7240 v->assign(result, result+sz);
7242 if (typesz2 != typesz or sz != sz2){
7244 result2 = (
int *) malloc(typesz2);
7245 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7247 v2->assign(result2, result2+sz2);
7251 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7253 v2->assign(result, result+sz2);
7256 if (typesz3 != typesz or sz != sz3){
7258 result3 = (
int *) malloc(typesz3);
7259 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
7261 v3->assign(result3, result3+sz3);
7265 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7267 v3->assign(result, result+sz3);
7270 clReleaseCommandQueue (queue);
7271 clReleaseMemObject(buffer);
7272 clReleaseMemObject(buffer2);
7273 clReleaseMemObject(buffer3);
7274 clReleaseEvent(gpuExec);
7280 size_t sz = v.size();
7281 size_t sz2 = v2.size();
7282 size_t sz3 = v3.size();
7283 size_t typesz =
sizeof(char) * sz;
7284 size_t typesz2 =
sizeof(int) * sz2;
7285 size_t typesz3 =
sizeof(float) * sz3;
7286 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7290 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7297 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7301 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7303 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7305 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7307 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7308 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7309 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7310 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7312 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
7314 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7316 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7319 size_t size[3] = {sz, sz2, sz3};
7320 size_t work_dimension = 3;
7323 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7326 else if(temp_sz > 0){
7328 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7329 For default multidimensional global work size, leave the global_work_size vector empty, \ 7330 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7331 the global_work_size vector.");
7337 else if (temp_sz == 2){
7347 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7354 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7356 clWaitForEvents(1, &gpuExec);
7358 char *result = (
char *) malloc(typesz);
7359 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7361 std::vector<char> res = std::vector<char>();
7362 res.assign(result, result+sz);
7364 clReleaseCommandQueue (queue);
7365 clReleaseMemObject(buffer);
7366 clReleaseMemObject(buffer2);
7367 clReleaseMemObject(buffer3);
7368 clReleaseEvent(gpuExec);
7375 size_t sz = v->size();
7376 size_t sz2 = v2.size();
7377 size_t sz3 = v3.size();
7378 size_t typesz =
sizeof(char) * sz;
7379 size_t typesz2 =
sizeof(int) * sz2;
7380 size_t typesz3 =
sizeof(float) * sz3;
7381 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7385 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7392 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7396 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7398 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7400 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7402 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7403 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7404 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7405 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7407 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7409 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7411 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7414 size_t size[3] = {sz, sz2, sz3};
7415 size_t work_dimension = 3;
7418 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7421 else if(temp_sz > 0){
7423 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7424 For default multidimensional global work size, leave the global_work_size vector empty, \ 7425 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7426 the global_work_size vector.");
7432 else if (temp_sz == 2){
7442 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7449 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7451 clWaitForEvents(1, &gpuExec);
7453 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7455 clWaitForEvents(1, &gpuExec);
7457 char *result = (
char *) malloc(typesz);
7458 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7460 v->assign(result, result+sz);
7462 clReleaseCommandQueue (queue);
7463 clReleaseMemObject(buffer);
7464 clReleaseMemObject(buffer2);
7465 clReleaseMemObject(buffer3);
7466 clReleaseEvent(gpuExec);
7471 size_t sz = v->size();
7472 size_t sz2 = v2->size();
7473 size_t sz3 = v3.size();
7474 size_t typesz =
sizeof(char) * sz;
7475 size_t typesz2 =
sizeof(int) * sz2;
7476 size_t typesz3 =
sizeof(float) * sz3;
7477 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7481 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7488 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7492 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7494 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7496 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7498 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7499 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7500 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7501 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7503 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7505 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7507 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7510 size_t size[3] = {sz, sz2, sz3};
7511 size_t work_dimension = 3;
7514 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7517 else if(temp_sz > 0){
7519 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7520 For default multidimensional global work size, leave the global_work_size vector empty, \ 7521 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7522 the global_work_size vector.");
7528 else if (temp_sz == 2){
7538 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7545 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7547 clWaitForEvents(1, &gpuExec);
7549 char *result = (
char *) malloc(typesz);
7550 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7552 v->assign(result, result+sz);
7554 if (typesz2 != typesz or sz != sz2){
7556 result2 = (
int *) malloc(typesz2);
7557 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7559 v2->assign(result2, result2+sz2);
7563 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7565 v2->assign(result, result+sz2);
7568 clReleaseCommandQueue (queue);
7569 clReleaseMemObject(buffer);
7570 clReleaseMemObject(buffer2);
7571 clReleaseMemObject(buffer3);
7572 clReleaseEvent(gpuExec);
7577 size_t sz = v->size();
7578 size_t sz2 = v2->size();
7579 size_t sz3 = v3->size();
7580 size_t typesz =
sizeof(char) * sz;
7581 size_t typesz2 =
sizeof(int) * sz2;
7582 size_t typesz3 =
sizeof(float) * sz3;
7583 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7587 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7594 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7598 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7600 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7602 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
7604 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7605 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7606 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7607 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7609 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7611 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7613 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
7616 size_t size[3] = {sz, sz2, sz3};
7617 size_t work_dimension = 3;
7620 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7623 else if(temp_sz > 0){
7625 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7626 For default multidimensional global work size, leave the global_work_size vector empty, \ 7627 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7628 the global_work_size vector.");
7634 else if (temp_sz == 2){
7644 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7651 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7653 clWaitForEvents(1, &gpuExec);
7655 char *result = (
char *) malloc(typesz);
7656 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7658 v->assign(result, result+sz);
7660 if (typesz2 != typesz or sz != sz2){
7662 result2 = (
int *) malloc(typesz2);
7663 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7665 v2->assign(result2, result2+sz2);
7669 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7671 v2->assign(result, result+sz2);
7674 if (typesz3 != typesz or sz != sz3){
7676 result3 = (
float *) malloc(typesz3);
7677 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
7679 v3->assign(result3, result3+sz3);
7683 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7685 v3->assign(result, result+sz3);
7688 clReleaseCommandQueue (queue);
7689 clReleaseMemObject(buffer);
7690 clReleaseMemObject(buffer2);
7691 clReleaseMemObject(buffer3);
7692 clReleaseEvent(gpuExec);
7698 size_t sz = v.size();
7699 size_t sz2 = v2.size();
7700 size_t sz3 = v3.size();
7701 size_t typesz =
sizeof(char) * sz;
7702 size_t typesz2 =
sizeof(int) * sz2;
7703 size_t typesz3 =
sizeof(double) * sz3;
7704 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7708 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7715 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7719 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7721 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7723 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7725 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7726 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7727 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7728 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7730 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
7732 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7734 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7737 size_t size[3] = {sz, sz2, sz3};
7738 size_t work_dimension = 3;
7741 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7744 else if(temp_sz > 0){
7746 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7747 For default multidimensional global work size, leave the global_work_size vector empty, \ 7748 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7749 the global_work_size vector.");
7755 else if (temp_sz == 2){
7765 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7772 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7774 clWaitForEvents(1, &gpuExec);
7776 char *result = (
char *) malloc(typesz);
7777 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7779 std::vector<char> res = std::vector<char>();
7780 res.assign(result, result+sz);
7782 clReleaseCommandQueue (queue);
7783 clReleaseMemObject(buffer);
7784 clReleaseMemObject(buffer2);
7785 clReleaseMemObject(buffer3);
7786 clReleaseEvent(gpuExec);
7793 size_t sz = v->size();
7794 size_t sz2 = v2.size();
7795 size_t sz3 = v3.size();
7796 size_t typesz =
sizeof(char) * sz;
7797 size_t typesz2 =
sizeof(int) * sz2;
7798 size_t typesz3 =
sizeof(double) * sz3;
7799 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7803 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7810 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7814 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7816 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
7818 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7820 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7821 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7822 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7823 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7825 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7827 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
7829 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7832 size_t size[3] = {sz, sz2, sz3};
7833 size_t work_dimension = 3;
7836 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7839 else if(temp_sz > 0){
7841 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7842 For default multidimensional global work size, leave the global_work_size vector empty, \ 7843 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7844 the global_work_size vector.");
7850 else if (temp_sz == 2){
7860 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7867 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7869 clWaitForEvents(1, &gpuExec);
7871 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7873 clWaitForEvents(1, &gpuExec);
7875 char *result = (
char *) malloc(typesz);
7876 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7878 v->assign(result, result+sz);
7880 clReleaseCommandQueue (queue);
7881 clReleaseMemObject(buffer);
7882 clReleaseMemObject(buffer2);
7883 clReleaseMemObject(buffer3);
7884 clReleaseEvent(gpuExec);
7889 size_t sz = v->size();
7890 size_t sz2 = v2->size();
7891 size_t sz3 = v3.size();
7892 size_t typesz =
sizeof(char) * sz;
7893 size_t typesz2 =
sizeof(int) * sz2;
7894 size_t typesz3 =
sizeof(double) * sz3;
7895 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
7899 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
7906 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
7910 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
7912 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
7914 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
7916 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
7917 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
7918 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
7919 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
7921 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
7923 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
7925 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
7928 size_t size[3] = {sz, sz2, sz3};
7929 size_t work_dimension = 3;
7932 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
7935 else if(temp_sz > 0){
7937 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 7938 For default multidimensional global work size, leave the global_work_size vector empty, \ 7939 and set multi_dimensional to true. Setting the global work size based on the values inside \ 7940 the global_work_size vector.");
7946 else if (temp_sz == 2){
7956 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
7963 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
7965 clWaitForEvents(1, &gpuExec);
7967 char *result = (
char *) malloc(typesz);
7968 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
7970 v->assign(result, result+sz);
7972 if (typesz2 != typesz or sz != sz2){
7974 result2 = (
int *) malloc(typesz2);
7975 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
7977 v2->assign(result2, result2+sz2);
7981 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
7983 v2->assign(result, result+sz2);
7986 clReleaseCommandQueue (queue);
7987 clReleaseMemObject(buffer);
7988 clReleaseMemObject(buffer2);
7989 clReleaseMemObject(buffer3);
7990 clReleaseEvent(gpuExec);
7995 size_t sz = v->size();
7996 size_t sz2 = v2->size();
7997 size_t sz3 = v3->size();
7998 size_t typesz =
sizeof(char) * sz;
7999 size_t typesz2 =
sizeof(int) * sz2;
8000 size_t typesz3 =
sizeof(double) * sz3;
8001 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8005 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8012 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8016 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8018 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8020 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8022 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8023 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8024 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8025 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8027 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8029 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8031 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8034 size_t size[3] = {sz, sz2, sz3};
8035 size_t work_dimension = 3;
8038 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8041 else if(temp_sz > 0){
8043 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8044 For default multidimensional global work size, leave the global_work_size vector empty, \ 8045 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8046 the global_work_size vector.");
8052 else if (temp_sz == 2){
8062 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8069 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8071 clWaitForEvents(1, &gpuExec);
8073 char *result = (
char *) malloc(typesz);
8074 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8076 v->assign(result, result+sz);
8078 if (typesz2 != typesz or sz != sz2){
8080 result2 = (
int *) malloc(typesz2);
8081 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8083 v2->assign(result2, result2+sz2);
8087 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8089 v2->assign(result, result+sz2);
8092 if (typesz3 != typesz or sz != sz3){
8094 result3 = (
double *) malloc(typesz3);
8095 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8097 v3->assign(result3, result3+sz3);
8101 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8103 v3->assign(result, result+sz3);
8106 clReleaseCommandQueue (queue);
8107 clReleaseMemObject(buffer);
8108 clReleaseMemObject(buffer2);
8109 clReleaseMemObject(buffer3);
8110 clReleaseEvent(gpuExec);
8116 size_t sz = v.size();
8117 size_t sz2 = v2.size();
8118 size_t sz3 = v3.size();
8119 size_t typesz =
sizeof(char) * sz;
8120 size_t typesz2 =
sizeof(float) * sz2;
8121 size_t typesz3 =
sizeof(char) * sz3;
8122 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8126 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8133 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8137 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8139 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8141 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8143 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8144 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8145 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8146 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8148 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8150 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8152 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8155 size_t size[3] = {sz, sz2, sz3};
8156 size_t work_dimension = 3;
8159 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8162 else if(temp_sz > 0){
8164 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8165 For default multidimensional global work size, leave the global_work_size vector empty, \ 8166 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8167 the global_work_size vector.");
8173 else if (temp_sz == 2){
8183 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8190 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8192 clWaitForEvents(1, &gpuExec);
8194 char *result = (
char *) malloc(typesz);
8195 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8197 std::vector<char> res = std::vector<char>();
8198 res.assign(result, result+sz);
8200 clReleaseCommandQueue (queue);
8201 clReleaseMemObject(buffer);
8202 clReleaseMemObject(buffer2);
8203 clReleaseMemObject(buffer3);
8204 clReleaseEvent(gpuExec);
8211 size_t sz = v->size();
8212 size_t sz2 = v2.size();
8213 size_t sz3 = v3.size();
8214 size_t typesz =
sizeof(char) * sz;
8215 size_t typesz2 =
sizeof(float) * sz2;
8216 size_t typesz3 =
sizeof(char) * sz3;
8217 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8221 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8228 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8232 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8234 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8236 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8238 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8239 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8240 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8241 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8243 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8245 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8247 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8250 size_t size[3] = {sz, sz2, sz3};
8251 size_t work_dimension = 3;
8254 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8257 else if(temp_sz > 0){
8259 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8260 For default multidimensional global work size, leave the global_work_size vector empty, \ 8261 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8262 the global_work_size vector.");
8268 else if (temp_sz == 2){
8278 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8285 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8287 clWaitForEvents(1, &gpuExec);
8289 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8291 clWaitForEvents(1, &gpuExec);
8293 char *result = (
char *) malloc(typesz);
8294 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8296 v->assign(result, result+sz);
8298 clReleaseCommandQueue (queue);
8299 clReleaseMemObject(buffer);
8300 clReleaseMemObject(buffer2);
8301 clReleaseMemObject(buffer3);
8302 clReleaseEvent(gpuExec);
8307 size_t sz = v->size();
8308 size_t sz2 = v2->size();
8309 size_t sz3 = v3.size();
8310 size_t typesz =
sizeof(char) * sz;
8311 size_t typesz2 =
sizeof(float) * sz2;
8312 size_t typesz3 =
sizeof(char) * sz3;
8313 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8317 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8324 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8328 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8330 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8332 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8334 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8335 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8336 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8337 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8339 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8341 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8343 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8346 size_t size[3] = {sz, sz2, sz3};
8347 size_t work_dimension = 3;
8350 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8353 else if(temp_sz > 0){
8355 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8356 For default multidimensional global work size, leave the global_work_size vector empty, \ 8357 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8358 the global_work_size vector.");
8364 else if (temp_sz == 2){
8374 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8381 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8383 clWaitForEvents(1, &gpuExec);
8385 char *result = (
char *) malloc(typesz);
8386 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8388 v->assign(result, result+sz);
8390 if (typesz2 != typesz or sz != sz2){
8392 result2 = (
float *) malloc(typesz2);
8393 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8395 v2->assign(result2, result2+sz2);
8399 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8401 v2->assign(result, result+sz2);
8404 clReleaseCommandQueue (queue);
8405 clReleaseMemObject(buffer);
8406 clReleaseMemObject(buffer2);
8407 clReleaseMemObject(buffer3);
8408 clReleaseEvent(gpuExec);
8413 size_t sz = v->size();
8414 size_t sz2 = v2->size();
8415 size_t sz3 = v3->size();
8416 size_t typesz =
sizeof(char) * sz;
8417 size_t typesz2 =
sizeof(float) * sz2;
8418 size_t typesz3 =
sizeof(char) * sz3;
8419 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8423 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8430 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8434 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8436 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8438 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8440 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8441 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8442 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8443 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8445 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8447 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8449 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8452 size_t size[3] = {sz, sz2, sz3};
8453 size_t work_dimension = 3;
8456 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8459 else if(temp_sz > 0){
8461 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8462 For default multidimensional global work size, leave the global_work_size vector empty, \ 8463 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8464 the global_work_size vector.");
8470 else if (temp_sz == 2){
8480 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8487 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8489 clWaitForEvents(1, &gpuExec);
8491 char *result = (
char *) malloc(typesz);
8492 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8494 v->assign(result, result+sz);
8496 if (typesz2 != typesz or sz != sz2){
8498 result2 = (
float *) malloc(typesz2);
8499 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8501 v2->assign(result2, result2+sz2);
8505 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8507 v2->assign(result, result+sz2);
8510 if (typesz3 != typesz or sz != sz3){
8512 result3 = (
char *) malloc(typesz3);
8513 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8515 v3->assign(result3, result3+sz3);
8519 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8521 v3->assign(result, result+sz3);
8524 clReleaseCommandQueue (queue);
8525 clReleaseMemObject(buffer);
8526 clReleaseMemObject(buffer2);
8527 clReleaseMemObject(buffer3);
8528 clReleaseEvent(gpuExec);
8534 size_t sz = v.size();
8535 size_t sz2 = v2.size();
8536 size_t sz3 = v3.size();
8537 size_t typesz =
sizeof(char) * sz;
8538 size_t typesz2 =
sizeof(float) * sz2;
8539 size_t typesz3 =
sizeof(int) * sz3;
8540 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8544 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8551 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8555 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8557 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8559 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8561 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8562 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8563 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8564 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8566 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8568 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8570 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8573 size_t size[3] = {sz, sz2, sz3};
8574 size_t work_dimension = 3;
8577 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8580 else if(temp_sz > 0){
8582 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8583 For default multidimensional global work size, leave the global_work_size vector empty, \ 8584 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8585 the global_work_size vector.");
8591 else if (temp_sz == 2){
8601 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8608 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8610 clWaitForEvents(1, &gpuExec);
8612 char *result = (
char *) malloc(typesz);
8613 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8615 std::vector<char> res = std::vector<char>();
8616 res.assign(result, result+sz);
8618 clReleaseCommandQueue (queue);
8619 clReleaseMemObject(buffer);
8620 clReleaseMemObject(buffer2);
8621 clReleaseMemObject(buffer3);
8622 clReleaseEvent(gpuExec);
8629 size_t sz = v->size();
8630 size_t sz2 = v2.size();
8631 size_t sz3 = v3.size();
8632 size_t typesz =
sizeof(char) * sz;
8633 size_t typesz2 =
sizeof(float) * sz2;
8634 size_t typesz3 =
sizeof(int) * sz3;
8635 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8639 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8646 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8650 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8652 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8654 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8656 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8657 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8658 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8659 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8661 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8663 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8665 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8668 size_t size[3] = {sz, sz2, sz3};
8669 size_t work_dimension = 3;
8672 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8675 else if(temp_sz > 0){
8677 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8678 For default multidimensional global work size, leave the global_work_size vector empty, \ 8679 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8680 the global_work_size vector.");
8686 else if (temp_sz == 2){
8696 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8703 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8705 clWaitForEvents(1, &gpuExec);
8707 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8709 clWaitForEvents(1, &gpuExec);
8711 char *result = (
char *) malloc(typesz);
8712 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8714 v->assign(result, result+sz);
8716 clReleaseCommandQueue (queue);
8717 clReleaseMemObject(buffer);
8718 clReleaseMemObject(buffer2);
8719 clReleaseMemObject(buffer3);
8720 clReleaseEvent(gpuExec);
8725 size_t sz = v->size();
8726 size_t sz2 = v2->size();
8727 size_t sz3 = v3.size();
8728 size_t typesz =
sizeof(char) * sz;
8729 size_t typesz2 =
sizeof(float) * sz2;
8730 size_t typesz3 =
sizeof(int) * sz3;
8731 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8735 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8742 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8746 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8748 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8750 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8752 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8753 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8754 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8755 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8757 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8759 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8761 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8764 size_t size[3] = {sz, sz2, sz3};
8765 size_t work_dimension = 3;
8768 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8771 else if(temp_sz > 0){
8773 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8774 For default multidimensional global work size, leave the global_work_size vector empty, \ 8775 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8776 the global_work_size vector.");
8782 else if (temp_sz == 2){
8792 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8799 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8801 clWaitForEvents(1, &gpuExec);
8803 char *result = (
char *) malloc(typesz);
8804 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8806 v->assign(result, result+sz);
8808 if (typesz2 != typesz or sz != sz2){
8810 result2 = (
float *) malloc(typesz2);
8811 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8813 v2->assign(result2, result2+sz2);
8817 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8819 v2->assign(result, result+sz2);
8822 clReleaseCommandQueue (queue);
8823 clReleaseMemObject(buffer);
8824 clReleaseMemObject(buffer2);
8825 clReleaseMemObject(buffer3);
8826 clReleaseEvent(gpuExec);
8831 size_t sz = v->size();
8832 size_t sz2 = v2->size();
8833 size_t sz3 = v3->size();
8834 size_t typesz =
sizeof(char) * sz;
8835 size_t typesz2 =
sizeof(float) * sz2;
8836 size_t typesz3 =
sizeof(int) * sz3;
8837 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8841 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8848 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8852 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8854 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
8856 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
8858 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8859 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8860 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8861 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8863 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
8865 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
8867 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
8870 size_t size[3] = {sz, sz2, sz3};
8871 size_t work_dimension = 3;
8874 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8877 else if(temp_sz > 0){
8879 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 8880 For default multidimensional global work size, leave the global_work_size vector empty, \ 8881 and set multi_dimensional to true. Setting the global work size based on the values inside \ 8882 the global_work_size vector.");
8888 else if (temp_sz == 2){
8898 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
8905 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
8907 clWaitForEvents(1, &gpuExec);
8909 char *result = (
char *) malloc(typesz);
8910 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
8912 v->assign(result, result+sz);
8914 if (typesz2 != typesz or sz != sz2){
8916 result2 = (
float *) malloc(typesz2);
8917 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
8919 v2->assign(result2, result2+sz2);
8923 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8925 v2->assign(result, result+sz2);
8928 if (typesz3 != typesz or sz != sz3){
8930 result3 = (
int *) malloc(typesz3);
8931 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
8933 v3->assign(result3, result3+sz3);
8937 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
8939 v3->assign(result, result+sz3);
8942 clReleaseCommandQueue (queue);
8943 clReleaseMemObject(buffer);
8944 clReleaseMemObject(buffer2);
8945 clReleaseMemObject(buffer3);
8946 clReleaseEvent(gpuExec);
8952 size_t sz = v.size();
8953 size_t sz2 = v2.size();
8954 size_t sz3 = v3.size();
8955 size_t typesz =
sizeof(char) * sz;
8956 size_t typesz2 =
sizeof(float) * sz2;
8957 size_t typesz3 =
sizeof(float) * sz3;
8958 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
8962 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
8969 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
8973 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
8975 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
8977 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
8979 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
8980 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
8981 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
8982 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
8984 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
8986 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
8988 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
8991 size_t size[3] = {sz, sz2, sz3};
8992 size_t work_dimension = 3;
8995 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
8998 else if(temp_sz > 0){
9000 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9001 For default multidimensional global work size, leave the global_work_size vector empty, \ 9002 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9003 the global_work_size vector.");
9009 else if (temp_sz == 2){
9019 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9026 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9028 clWaitForEvents(1, &gpuExec);
9030 char *result = (
char *) malloc(typesz);
9031 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9033 std::vector<char> res = std::vector<char>();
9034 res.assign(result, result+sz);
9036 clReleaseCommandQueue (queue);
9037 clReleaseMemObject(buffer);
9038 clReleaseMemObject(buffer2);
9039 clReleaseMemObject(buffer3);
9040 clReleaseEvent(gpuExec);
9047 size_t sz = v->size();
9048 size_t sz2 = v2.size();
9049 size_t sz3 = v3.size();
9050 size_t typesz =
sizeof(char) * sz;
9051 size_t typesz2 =
sizeof(float) * sz2;
9052 size_t typesz3 =
sizeof(float) * sz3;
9053 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9057 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9064 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9068 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9070 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9072 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9074 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9075 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9076 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9077 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9079 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9081 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9083 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9086 size_t size[3] = {sz, sz2, sz3};
9087 size_t work_dimension = 3;
9090 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9093 else if(temp_sz > 0){
9095 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9096 For default multidimensional global work size, leave the global_work_size vector empty, \ 9097 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9098 the global_work_size vector.");
9104 else if (temp_sz == 2){
9114 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9121 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9123 clWaitForEvents(1, &gpuExec);
9125 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9127 clWaitForEvents(1, &gpuExec);
9129 char *result = (
char *) malloc(typesz);
9130 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9132 v->assign(result, result+sz);
9134 clReleaseCommandQueue (queue);
9135 clReleaseMemObject(buffer);
9136 clReleaseMemObject(buffer2);
9137 clReleaseMemObject(buffer3);
9138 clReleaseEvent(gpuExec);
9143 size_t sz = v->size();
9144 size_t sz2 = v2->size();
9145 size_t sz3 = v3.size();
9146 size_t typesz =
sizeof(char) * sz;
9147 size_t typesz2 =
sizeof(float) * sz2;
9148 size_t typesz3 =
sizeof(float) * sz3;
9149 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9153 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9160 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9164 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9166 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9168 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9170 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9171 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9172 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9173 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9175 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9177 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9179 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9182 size_t size[3] = {sz, sz2, sz3};
9183 size_t work_dimension = 3;
9186 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9189 else if(temp_sz > 0){
9191 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9192 For default multidimensional global work size, leave the global_work_size vector empty, \ 9193 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9194 the global_work_size vector.");
9200 else if (temp_sz == 2){
9210 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9217 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9219 clWaitForEvents(1, &gpuExec);
9221 char *result = (
char *) malloc(typesz);
9222 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9224 v->assign(result, result+sz);
9226 if (typesz2 != typesz or sz != sz2){
9228 result2 = (
float *) malloc(typesz2);
9229 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9231 v2->assign(result2, result2+sz2);
9235 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9237 v2->assign(result, result+sz2);
9240 clReleaseCommandQueue (queue);
9241 clReleaseMemObject(buffer);
9242 clReleaseMemObject(buffer2);
9243 clReleaseMemObject(buffer3);
9244 clReleaseEvent(gpuExec);
9249 size_t sz = v->size();
9250 size_t sz2 = v2->size();
9251 size_t sz3 = v3->size();
9252 size_t typesz =
sizeof(char) * sz;
9253 size_t typesz2 =
sizeof(float) * sz2;
9254 size_t typesz3 =
sizeof(float) * sz3;
9255 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9259 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9266 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9270 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9272 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9274 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
9276 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9277 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9278 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9279 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9281 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9283 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9285 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
9288 size_t size[3] = {sz, sz2, sz3};
9289 size_t work_dimension = 3;
9292 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9295 else if(temp_sz > 0){
9297 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9298 For default multidimensional global work size, leave the global_work_size vector empty, \ 9299 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9300 the global_work_size vector.");
9306 else if (temp_sz == 2){
9316 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9323 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9325 clWaitForEvents(1, &gpuExec);
9327 char *result = (
char *) malloc(typesz);
9328 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9330 v->assign(result, result+sz);
9332 if (typesz2 != typesz or sz != sz2){
9334 result2 = (
float *) malloc(typesz2);
9335 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9337 v2->assign(result2, result2+sz2);
9341 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9343 v2->assign(result, result+sz2);
9346 if (typesz3 != typesz or sz != sz3){
9348 result3 = (
float *) malloc(typesz3);
9349 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
9351 v3->assign(result3, result3+sz3);
9355 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9357 v3->assign(result, result+sz3);
9360 clReleaseCommandQueue (queue);
9361 clReleaseMemObject(buffer);
9362 clReleaseMemObject(buffer2);
9363 clReleaseMemObject(buffer3);
9364 clReleaseEvent(gpuExec);
9370 size_t sz = v.size();
9371 size_t sz2 = v2.size();
9372 size_t sz3 = v3.size();
9373 size_t typesz =
sizeof(char) * sz;
9374 size_t typesz2 =
sizeof(float) * sz2;
9375 size_t typesz3 =
sizeof(double) * sz3;
9376 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9380 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9387 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9391 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9393 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9395 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9397 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9398 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9399 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9400 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9402 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
9404 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9406 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9409 size_t size[3] = {sz, sz2, sz3};
9410 size_t work_dimension = 3;
9413 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9416 else if(temp_sz > 0){
9418 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9419 For default multidimensional global work size, leave the global_work_size vector empty, \ 9420 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9421 the global_work_size vector.");
9427 else if (temp_sz == 2){
9437 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9444 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9446 clWaitForEvents(1, &gpuExec);
9448 char *result = (
char *) malloc(typesz);
9449 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9451 std::vector<char> res = std::vector<char>();
9452 res.assign(result, result+sz);
9454 clReleaseCommandQueue (queue);
9455 clReleaseMemObject(buffer);
9456 clReleaseMemObject(buffer2);
9457 clReleaseMemObject(buffer3);
9458 clReleaseEvent(gpuExec);
9465 size_t sz = v->size();
9466 size_t sz2 = v2.size();
9467 size_t sz3 = v3.size();
9468 size_t typesz =
sizeof(char) * sz;
9469 size_t typesz2 =
sizeof(float) * sz2;
9470 size_t typesz3 =
sizeof(double) * sz3;
9471 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9475 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9482 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9486 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9488 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9490 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9492 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9493 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9494 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9495 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9497 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9499 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9501 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9504 size_t size[3] = {sz, sz2, sz3};
9505 size_t work_dimension = 3;
9508 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9511 else if(temp_sz > 0){
9513 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9514 For default multidimensional global work size, leave the global_work_size vector empty, \ 9515 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9516 the global_work_size vector.");
9522 else if (temp_sz == 2){
9532 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9539 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9541 clWaitForEvents(1, &gpuExec);
9543 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9545 clWaitForEvents(1, &gpuExec);
9547 char *result = (
char *) malloc(typesz);
9548 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9550 v->assign(result, result+sz);
9552 clReleaseCommandQueue (queue);
9553 clReleaseMemObject(buffer);
9554 clReleaseMemObject(buffer2);
9555 clReleaseMemObject(buffer3);
9556 clReleaseEvent(gpuExec);
9561 size_t sz = v->size();
9562 size_t sz2 = v2->size();
9563 size_t sz3 = v3.size();
9564 size_t typesz =
sizeof(char) * sz;
9565 size_t typesz2 =
sizeof(float) * sz2;
9566 size_t typesz3 =
sizeof(double) * sz3;
9567 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9571 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9578 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9582 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9584 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9586 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9588 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9589 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9590 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9591 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9593 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9595 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9597 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9600 size_t size[3] = {sz, sz2, sz3};
9601 size_t work_dimension = 3;
9604 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9607 else if(temp_sz > 0){
9609 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9610 For default multidimensional global work size, leave the global_work_size vector empty, \ 9611 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9612 the global_work_size vector.");
9618 else if (temp_sz == 2){
9628 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9635 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9637 clWaitForEvents(1, &gpuExec);
9639 char *result = (
char *) malloc(typesz);
9640 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9642 v->assign(result, result+sz);
9644 if (typesz2 != typesz or sz != sz2){
9646 result2 = (
float *) malloc(typesz2);
9647 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9649 v2->assign(result2, result2+sz2);
9653 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9655 v2->assign(result, result+sz2);
9658 clReleaseCommandQueue (queue);
9659 clReleaseMemObject(buffer);
9660 clReleaseMemObject(buffer2);
9661 clReleaseMemObject(buffer3);
9662 clReleaseEvent(gpuExec);
9667 size_t sz = v->size();
9668 size_t sz2 = v2->size();
9669 size_t sz3 = v3->size();
9670 size_t typesz =
sizeof(char) * sz;
9671 size_t typesz2 =
sizeof(float) * sz2;
9672 size_t typesz3 =
sizeof(double) * sz3;
9673 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9677 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9684 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9688 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9690 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
9692 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
9694 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9695 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9696 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9697 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9699 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9701 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
9703 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
9706 size_t size[3] = {sz, sz2, sz3};
9707 size_t work_dimension = 3;
9710 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9713 else if(temp_sz > 0){
9715 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9716 For default multidimensional global work size, leave the global_work_size vector empty, \ 9717 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9718 the global_work_size vector.");
9724 else if (temp_sz == 2){
9734 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9741 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9743 clWaitForEvents(1, &gpuExec);
9745 char *result = (
char *) malloc(typesz);
9746 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9748 v->assign(result, result+sz);
9750 if (typesz2 != typesz or sz != sz2){
9752 result2 = (
float *) malloc(typesz2);
9753 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
9755 v2->assign(result2, result2+sz2);
9759 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9761 v2->assign(result, result+sz2);
9764 if (typesz3 != typesz or sz != sz3){
9766 result3 = (
double *) malloc(typesz3);
9767 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
9769 v3->assign(result3, result3+sz3);
9773 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
9775 v3->assign(result, result+sz3);
9778 clReleaseCommandQueue (queue);
9779 clReleaseMemObject(buffer);
9780 clReleaseMemObject(buffer2);
9781 clReleaseMemObject(buffer3);
9782 clReleaseEvent(gpuExec);
9788 size_t sz = v.size();
9789 size_t sz2 = v2.size();
9790 size_t sz3 = v3.size();
9791 size_t typesz =
sizeof(char) * sz;
9792 size_t typesz2 =
sizeof(double) * sz2;
9793 size_t typesz3 =
sizeof(char) * sz3;
9794 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9798 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9805 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9809 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9811 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9813 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9815 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9816 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9817 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9818 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9820 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
9822 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9824 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9827 size_t size[3] = {sz, sz2, sz3};
9828 size_t work_dimension = 3;
9831 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9834 else if(temp_sz > 0){
9836 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9837 For default multidimensional global work size, leave the global_work_size vector empty, \ 9838 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9839 the global_work_size vector.");
9845 else if (temp_sz == 2){
9855 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9862 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9864 clWaitForEvents(1, &gpuExec);
9866 char *result = (
char *) malloc(typesz);
9867 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9869 std::vector<char> res = std::vector<char>();
9870 res.assign(result, result+sz);
9872 clReleaseCommandQueue (queue);
9873 clReleaseMemObject(buffer);
9874 clReleaseMemObject(buffer2);
9875 clReleaseMemObject(buffer3);
9876 clReleaseEvent(gpuExec);
9883 size_t sz = v->size();
9884 size_t sz2 = v2.size();
9885 size_t sz3 = v3.size();
9886 size_t typesz =
sizeof(char) * sz;
9887 size_t typesz2 =
sizeof(double) * sz2;
9888 size_t typesz3 =
sizeof(char) * sz3;
9889 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9893 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9900 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
9904 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
9906 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
9908 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
9910 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
9911 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
9912 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
9913 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
9915 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
9917 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
9919 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
9922 size_t size[3] = {sz, sz2, sz3};
9923 size_t work_dimension = 3;
9926 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
9929 else if(temp_sz > 0){
9931 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 9932 For default multidimensional global work size, leave the global_work_size vector empty, \ 9933 and set multi_dimensional to true. Setting the global work size based on the values inside \ 9934 the global_work_size vector.");
9940 else if (temp_sz == 2){
9950 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
9957 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9959 clWaitForEvents(1, &gpuExec);
9961 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
9963 clWaitForEvents(1, &gpuExec);
9965 char *result = (
char *) malloc(typesz);
9966 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
9968 v->assign(result, result+sz);
9970 clReleaseCommandQueue (queue);
9971 clReleaseMemObject(buffer);
9972 clReleaseMemObject(buffer2);
9973 clReleaseMemObject(buffer3);
9974 clReleaseEvent(gpuExec);
9979 size_t sz = v->size();
9980 size_t sz2 = v2->size();
9981 size_t sz3 = v3.size();
9982 size_t typesz =
sizeof(char) * sz;
9983 size_t typesz2 =
sizeof(double) * sz2;
9984 size_t typesz3 =
sizeof(char) * sz3;
9985 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
9989 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
9996 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10000 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10002 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10004 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10006 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10007 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10008 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10009 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10011 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10013 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10015 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10018 size_t size[3] = {sz, sz2, sz3};
10019 size_t work_dimension = 3;
10022 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10023 work_dimension = 1;
10025 else if(temp_sz > 0){
10027 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10028 For default multidimensional global work size, leave the global_work_size vector empty, \ 10029 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10030 the global_work_size vector.");
10034 work_dimension = 1;
10036 else if (temp_sz == 2){
10039 work_dimension = 2;
10046 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10053 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10055 clWaitForEvents(1, &gpuExec);
10057 char *result = (
char *) malloc(typesz);
10058 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10060 v->assign(result, result+sz);
10062 if (typesz2 != typesz or sz != sz2){
10064 result2 = (
double *) malloc(typesz2);
10065 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10067 v2->assign(result2, result2+sz2);
10071 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10073 v2->assign(result, result+sz2);
10076 clReleaseCommandQueue (queue);
10077 clReleaseMemObject(buffer);
10078 clReleaseMemObject(buffer2);
10079 clReleaseMemObject(buffer3);
10080 clReleaseEvent(gpuExec);
10085 size_t sz = v->size();
10086 size_t sz2 = v2->size();
10087 size_t sz3 = v3->size();
10088 size_t typesz =
sizeof(char) * sz;
10089 size_t typesz2 =
sizeof(double) * sz2;
10090 size_t typesz3 =
sizeof(char) * sz3;
10091 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10095 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10102 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10106 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10108 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10110 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10112 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10113 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10114 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10115 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10117 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10119 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10121 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10124 size_t size[3] = {sz, sz2, sz3};
10125 size_t work_dimension = 3;
10128 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10129 work_dimension = 1;
10131 else if(temp_sz > 0){
10133 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10134 For default multidimensional global work size, leave the global_work_size vector empty, \ 10135 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10136 the global_work_size vector.");
10140 work_dimension = 1;
10142 else if (temp_sz == 2){
10145 work_dimension = 2;
10152 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10159 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10161 clWaitForEvents(1, &gpuExec);
10163 char *result = (
char *) malloc(typesz);
10164 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10166 v->assign(result, result+sz);
10168 if (typesz2 != typesz or sz != sz2){
10170 result2 = (
double *) malloc(typesz2);
10171 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10173 v2->assign(result2, result2+sz2);
10177 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10179 v2->assign(result, result+sz2);
10182 if (typesz3 != typesz or sz != sz3){
10184 result3 = (
char *) malloc(typesz3);
10185 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
10187 v3->assign(result3, result3+sz3);
10191 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10193 v3->assign(result, result+sz3);
10196 clReleaseCommandQueue (queue);
10197 clReleaseMemObject(buffer);
10198 clReleaseMemObject(buffer2);
10199 clReleaseMemObject(buffer3);
10200 clReleaseEvent(gpuExec);
10206 size_t sz = v.size();
10207 size_t sz2 = v2.size();
10208 size_t sz3 = v3.size();
10209 size_t typesz =
sizeof(char) * sz;
10210 size_t typesz2 =
sizeof(double) * sz2;
10211 size_t typesz3 =
sizeof(int) * sz3;
10212 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10216 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10223 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10227 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10229 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10231 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10233 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10234 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10235 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10236 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10238 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
10240 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10242 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10245 size_t size[3] = {sz, sz2, sz3};
10246 size_t work_dimension = 3;
10249 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10250 work_dimension = 1;
10252 else if(temp_sz > 0){
10254 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10255 For default multidimensional global work size, leave the global_work_size vector empty, \ 10256 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10257 the global_work_size vector.");
10261 work_dimension = 1;
10263 else if (temp_sz == 2){
10266 work_dimension = 2;
10273 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10280 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10282 clWaitForEvents(1, &gpuExec);
10284 char *result = (
char *) malloc(typesz);
10285 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10287 std::vector<char> res = std::vector<char>();
10288 res.assign(result, result+sz);
10290 clReleaseCommandQueue (queue);
10291 clReleaseMemObject(buffer);
10292 clReleaseMemObject(buffer2);
10293 clReleaseMemObject(buffer3);
10294 clReleaseEvent(gpuExec);
10301 size_t sz = v->size();
10302 size_t sz2 = v2.size();
10303 size_t sz3 = v3.size();
10304 size_t typesz =
sizeof(char) * sz;
10305 size_t typesz2 =
sizeof(double) * sz2;
10306 size_t typesz3 =
sizeof(int) * sz3;
10307 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10311 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10318 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10322 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10324 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10326 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10328 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10329 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10330 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10331 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10333 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10335 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10337 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10340 size_t size[3] = {sz, sz2, sz3};
10341 size_t work_dimension = 3;
10344 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10345 work_dimension = 1;
10347 else if(temp_sz > 0){
10349 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10350 For default multidimensional global work size, leave the global_work_size vector empty, \ 10351 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10352 the global_work_size vector.");
10356 work_dimension = 1;
10358 else if (temp_sz == 2){
10361 work_dimension = 2;
10368 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10375 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10377 clWaitForEvents(1, &gpuExec);
10379 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10381 clWaitForEvents(1, &gpuExec);
10383 char *result = (
char *) malloc(typesz);
10384 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10386 v->assign(result, result+sz);
10388 clReleaseCommandQueue (queue);
10389 clReleaseMemObject(buffer);
10390 clReleaseMemObject(buffer2);
10391 clReleaseMemObject(buffer3);
10392 clReleaseEvent(gpuExec);
10397 size_t sz = v->size();
10398 size_t sz2 = v2->size();
10399 size_t sz3 = v3.size();
10400 size_t typesz =
sizeof(char) * sz;
10401 size_t typesz2 =
sizeof(double) * sz2;
10402 size_t typesz3 =
sizeof(int) * sz3;
10403 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10407 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10414 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10418 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10420 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10422 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10424 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10425 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10426 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10427 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10429 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10431 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10433 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10436 size_t size[3] = {sz, sz2, sz3};
10437 size_t work_dimension = 3;
10440 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10441 work_dimension = 1;
10443 else if(temp_sz > 0){
10445 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10446 For default multidimensional global work size, leave the global_work_size vector empty, \ 10447 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10448 the global_work_size vector.");
10452 work_dimension = 1;
10454 else if (temp_sz == 2){
10457 work_dimension = 2;
10464 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10471 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10473 clWaitForEvents(1, &gpuExec);
10475 char *result = (
char *) malloc(typesz);
10476 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10478 v->assign(result, result+sz);
10480 if (typesz2 != typesz or sz != sz2){
10482 result2 = (
double *) malloc(typesz2);
10483 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10485 v2->assign(result2, result2+sz2);
10489 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10491 v2->assign(result, result+sz2);
10494 clReleaseCommandQueue (queue);
10495 clReleaseMemObject(buffer);
10496 clReleaseMemObject(buffer2);
10497 clReleaseMemObject(buffer3);
10498 clReleaseEvent(gpuExec);
10503 size_t sz = v->size();
10504 size_t sz2 = v2->size();
10505 size_t sz3 = v3->size();
10506 size_t typesz =
sizeof(char) * sz;
10507 size_t typesz2 =
sizeof(double) * sz2;
10508 size_t typesz3 =
sizeof(int) * sz3;
10509 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10513 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10520 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10524 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10526 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10528 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10530 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10531 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10532 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10533 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10535 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10537 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10539 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10542 size_t size[3] = {sz, sz2, sz3};
10543 size_t work_dimension = 3;
10546 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10547 work_dimension = 1;
10549 else if(temp_sz > 0){
10551 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10552 For default multidimensional global work size, leave the global_work_size vector empty, \ 10553 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10554 the global_work_size vector.");
10558 work_dimension = 1;
10560 else if (temp_sz == 2){
10563 work_dimension = 2;
10570 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10577 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10579 clWaitForEvents(1, &gpuExec);
10581 char *result = (
char *) malloc(typesz);
10582 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10584 v->assign(result, result+sz);
10586 if (typesz2 != typesz or sz != sz2){
10588 result2 = (
double *) malloc(typesz2);
10589 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10591 v2->assign(result2, result2+sz2);
10595 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10597 v2->assign(result, result+sz2);
10600 if (typesz3 != typesz or sz != sz3){
10602 result3 = (
int *) malloc(typesz3);
10603 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
10605 v3->assign(result3, result3+sz3);
10609 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10611 v3->assign(result, result+sz3);
10614 clReleaseCommandQueue (queue);
10615 clReleaseMemObject(buffer);
10616 clReleaseMemObject(buffer2);
10617 clReleaseMemObject(buffer3);
10618 clReleaseEvent(gpuExec);
10624 size_t sz = v.size();
10625 size_t sz2 = v2.size();
10626 size_t sz3 = v3.size();
10627 size_t typesz =
sizeof(char) * sz;
10628 size_t typesz2 =
sizeof(double) * sz2;
10629 size_t typesz3 =
sizeof(float) * sz3;
10630 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10634 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10641 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10645 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10647 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10649 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10651 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10652 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10653 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10654 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10656 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
10658 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10660 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10663 size_t size[3] = {sz, sz2, sz3};
10664 size_t work_dimension = 3;
10667 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10668 work_dimension = 1;
10670 else if(temp_sz > 0){
10672 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10673 For default multidimensional global work size, leave the global_work_size vector empty, \ 10674 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10675 the global_work_size vector.");
10679 work_dimension = 1;
10681 else if (temp_sz == 2){
10684 work_dimension = 2;
10691 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10698 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10700 clWaitForEvents(1, &gpuExec);
10702 char *result = (
char *) malloc(typesz);
10703 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10705 std::vector<char> res = std::vector<char>();
10706 res.assign(result, result+sz);
10708 clReleaseCommandQueue (queue);
10709 clReleaseMemObject(buffer);
10710 clReleaseMemObject(buffer2);
10711 clReleaseMemObject(buffer3);
10712 clReleaseEvent(gpuExec);
10719 size_t sz = v->size();
10720 size_t sz2 = v2.size();
10721 size_t sz3 = v3.size();
10722 size_t typesz =
sizeof(char) * sz;
10723 size_t typesz2 =
sizeof(double) * sz2;
10724 size_t typesz3 =
sizeof(float) * sz3;
10725 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10729 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10736 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10740 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10742 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
10744 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10746 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10747 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10748 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10749 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10751 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10753 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
10755 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10758 size_t size[3] = {sz, sz2, sz3};
10759 size_t work_dimension = 3;
10762 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10763 work_dimension = 1;
10765 else if(temp_sz > 0){
10767 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10768 For default multidimensional global work size, leave the global_work_size vector empty, \ 10769 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10770 the global_work_size vector.");
10774 work_dimension = 1;
10776 else if (temp_sz == 2){
10779 work_dimension = 2;
10786 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10793 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10795 clWaitForEvents(1, &gpuExec);
10797 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10799 clWaitForEvents(1, &gpuExec);
10801 char *result = (
char *) malloc(typesz);
10802 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10804 v->assign(result, result+sz);
10806 clReleaseCommandQueue (queue);
10807 clReleaseMemObject(buffer);
10808 clReleaseMemObject(buffer2);
10809 clReleaseMemObject(buffer3);
10810 clReleaseEvent(gpuExec);
10815 size_t sz = v->size();
10816 size_t sz2 = v2->size();
10817 size_t sz3 = v3.size();
10818 size_t typesz =
sizeof(char) * sz;
10819 size_t typesz2 =
sizeof(double) * sz2;
10820 size_t typesz3 =
sizeof(float) * sz3;
10821 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10825 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10832 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10836 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10838 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10840 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
10842 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10843 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10844 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10845 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10847 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10849 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10851 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
10854 size_t size[3] = {sz, sz2, sz3};
10855 size_t work_dimension = 3;
10858 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10859 work_dimension = 1;
10861 else if(temp_sz > 0){
10863 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10864 For default multidimensional global work size, leave the global_work_size vector empty, \ 10865 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10866 the global_work_size vector.");
10870 work_dimension = 1;
10872 else if (temp_sz == 2){
10875 work_dimension = 2;
10882 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10889 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10891 clWaitForEvents(1, &gpuExec);
10893 char *result = (
char *) malloc(typesz);
10894 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
10896 v->assign(result, result+sz);
10898 if (typesz2 != typesz or sz != sz2){
10900 result2 = (
double *) malloc(typesz2);
10901 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
10903 v2->assign(result2, result2+sz2);
10907 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
10909 v2->assign(result, result+sz2);
10912 clReleaseCommandQueue (queue);
10913 clReleaseMemObject(buffer);
10914 clReleaseMemObject(buffer2);
10915 clReleaseMemObject(buffer3);
10916 clReleaseEvent(gpuExec);
10921 size_t sz = v->size();
10922 size_t sz2 = v2->size();
10923 size_t sz3 = v3->size();
10924 size_t typesz =
sizeof(char) * sz;
10925 size_t typesz2 =
sizeof(double) * sz2;
10926 size_t typesz3 =
sizeof(float) * sz3;
10927 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
10931 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
10938 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
10942 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
10944 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
10946 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
10948 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
10949 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
10950 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
10951 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
10953 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
10955 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
10957 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
10960 size_t size[3] = {sz, sz2, sz3};
10961 size_t work_dimension = 3;
10964 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
10965 work_dimension = 1;
10967 else if(temp_sz > 0){
10969 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 10970 For default multidimensional global work size, leave the global_work_size vector empty, \ 10971 and set multi_dimensional to true. Setting the global work size based on the values inside \ 10972 the global_work_size vector.");
10976 work_dimension = 1;
10978 else if (temp_sz == 2){
10981 work_dimension = 2;
10988 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
10995 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
10997 clWaitForEvents(1, &gpuExec);
10999 char *result = (
char *) malloc(typesz);
11000 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11002 v->assign(result, result+sz);
11004 if (typesz2 != typesz or sz != sz2){
11006 result2 = (
double *) malloc(typesz2);
11007 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11009 v2->assign(result2, result2+sz2);
11013 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11015 v2->assign(result, result+sz2);
11018 if (typesz3 != typesz or sz != sz3){
11020 result3 = (
float *) malloc(typesz3);
11021 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11023 v3->assign(result3, result3+sz3);
11027 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11029 v3->assign(result, result+sz3);
11032 clReleaseCommandQueue (queue);
11033 clReleaseMemObject(buffer);
11034 clReleaseMemObject(buffer2);
11035 clReleaseMemObject(buffer3);
11036 clReleaseEvent(gpuExec);
11042 size_t sz = v.size();
11043 size_t sz2 = v2.size();
11044 size_t sz3 = v3.size();
11045 size_t typesz =
sizeof(char) * sz;
11046 size_t typesz2 =
sizeof(double) * sz2;
11047 size_t typesz3 =
sizeof(double) * sz3;
11048 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11052 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11059 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11063 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11065 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11067 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11069 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11070 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11071 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11072 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11074 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11076 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11078 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11081 size_t size[3] = {sz, sz2, sz3};
11082 size_t work_dimension = 3;
11085 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11086 work_dimension = 1;
11088 else if(temp_sz > 0){
11090 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11091 For default multidimensional global work size, leave the global_work_size vector empty, \ 11092 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11093 the global_work_size vector.");
11097 work_dimension = 1;
11099 else if (temp_sz == 2){
11102 work_dimension = 2;
11109 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11116 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11118 clWaitForEvents(1, &gpuExec);
11120 char *result = (
char *) malloc(typesz);
11121 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11123 std::vector<char> res = std::vector<char>();
11124 res.assign(result, result+sz);
11126 clReleaseCommandQueue (queue);
11127 clReleaseMemObject(buffer);
11128 clReleaseMemObject(buffer2);
11129 clReleaseMemObject(buffer3);
11130 clReleaseEvent(gpuExec);
11137 size_t sz = v->size();
11138 size_t sz2 = v2.size();
11139 size_t sz3 = v3.size();
11140 size_t typesz =
sizeof(char) * sz;
11141 size_t typesz2 =
sizeof(double) * sz2;
11142 size_t typesz3 =
sizeof(double) * sz3;
11143 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11147 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11154 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11158 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11160 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11162 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11164 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11165 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11166 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11167 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11169 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11171 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11173 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11176 size_t size[3] = {sz, sz2, sz3};
11177 size_t work_dimension = 3;
11180 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11181 work_dimension = 1;
11183 else if(temp_sz > 0){
11185 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11186 For default multidimensional global work size, leave the global_work_size vector empty, \ 11187 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11188 the global_work_size vector.");
11192 work_dimension = 1;
11194 else if (temp_sz == 2){
11197 work_dimension = 2;
11204 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11211 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11213 clWaitForEvents(1, &gpuExec);
11215 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11217 clWaitForEvents(1, &gpuExec);
11219 char *result = (
char *) malloc(typesz);
11220 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11222 v->assign(result, result+sz);
11224 clReleaseCommandQueue (queue);
11225 clReleaseMemObject(buffer);
11226 clReleaseMemObject(buffer2);
11227 clReleaseMemObject(buffer3);
11228 clReleaseEvent(gpuExec);
11233 size_t sz = v->size();
11234 size_t sz2 = v2->size();
11235 size_t sz3 = v3.size();
11236 size_t typesz =
sizeof(char) * sz;
11237 size_t typesz2 =
sizeof(double) * sz2;
11238 size_t typesz3 =
sizeof(double) * sz3;
11239 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11243 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11250 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11254 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11256 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11258 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11260 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11261 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11262 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11263 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11265 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11267 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11269 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11272 size_t size[3] = {sz, sz2, sz3};
11273 size_t work_dimension = 3;
11276 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11277 work_dimension = 1;
11279 else if(temp_sz > 0){
11281 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11282 For default multidimensional global work size, leave the global_work_size vector empty, \ 11283 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11284 the global_work_size vector.");
11288 work_dimension = 1;
11290 else if (temp_sz == 2){
11293 work_dimension = 2;
11300 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11307 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11309 clWaitForEvents(1, &gpuExec);
11311 char *result = (
char *) malloc(typesz);
11312 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11314 v->assign(result, result+sz);
11316 if (typesz2 != typesz or sz != sz2){
11318 result2 = (
double *) malloc(typesz2);
11319 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11321 v2->assign(result2, result2+sz2);
11325 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11327 v2->assign(result, result+sz2);
11330 clReleaseCommandQueue (queue);
11331 clReleaseMemObject(buffer);
11332 clReleaseMemObject(buffer2);
11333 clReleaseMemObject(buffer3);
11334 clReleaseEvent(gpuExec);
11339 size_t sz = v->size();
11340 size_t sz2 = v2->size();
11341 size_t sz3 = v3->size();
11342 size_t typesz =
sizeof(char) * sz;
11343 size_t typesz2 =
sizeof(double) * sz2;
11344 size_t typesz3 =
sizeof(double) * sz3;
11345 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11349 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11356 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11360 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11362 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11364 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
11366 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11367 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11368 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11369 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11371 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11373 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11375 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
11378 size_t size[3] = {sz, sz2, sz3};
11379 size_t work_dimension = 3;
11382 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11383 work_dimension = 1;
11385 else if(temp_sz > 0){
11387 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11388 For default multidimensional global work size, leave the global_work_size vector empty, \ 11389 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11390 the global_work_size vector.");
11394 work_dimension = 1;
11396 else if (temp_sz == 2){
11399 work_dimension = 2;
11406 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11413 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11415 clWaitForEvents(1, &gpuExec);
11417 char *result = (
char *) malloc(typesz);
11418 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11420 v->assign(result, result+sz);
11422 if (typesz2 != typesz or sz != sz2){
11424 result2 = (
double *) malloc(typesz2);
11425 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11427 v2->assign(result2, result2+sz2);
11431 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11433 v2->assign(result, result+sz2);
11436 if (typesz3 != typesz or sz != sz3){
11438 result3 = (
double *) malloc(typesz3);
11439 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11441 v3->assign(result3, result3+sz3);
11445 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11447 v3->assign(result, result+sz3);
11450 clReleaseCommandQueue (queue);
11451 clReleaseMemObject(buffer);
11452 clReleaseMemObject(buffer2);
11453 clReleaseMemObject(buffer3);
11454 clReleaseEvent(gpuExec);
11460 size_t sz = v.size();
11461 size_t sz2 = v2.size();
11462 size_t sz3 = v3.size();
11463 size_t typesz =
sizeof(int) * sz;
11464 size_t typesz2 =
sizeof(char) * sz2;
11465 size_t typesz3 =
sizeof(char) * sz3;
11466 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11470 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11477 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11481 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11483 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11485 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11487 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11488 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11489 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11490 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11492 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11494 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11496 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11499 size_t size[3] = {sz, sz2, sz3};
11500 size_t work_dimension = 3;
11503 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11504 work_dimension = 1;
11506 else if(temp_sz > 0){
11508 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11509 For default multidimensional global work size, leave the global_work_size vector empty, \ 11510 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11511 the global_work_size vector.");
11515 work_dimension = 1;
11517 else if (temp_sz == 2){
11520 work_dimension = 2;
11527 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11534 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11536 clWaitForEvents(1, &gpuExec);
11538 int *result = (
int *) malloc(typesz);
11539 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11541 std::vector<int> res = std::vector<int>();
11542 res.assign(result, result+sz);
11544 clReleaseCommandQueue (queue);
11545 clReleaseMemObject(buffer);
11546 clReleaseMemObject(buffer2);
11547 clReleaseMemObject(buffer3);
11548 clReleaseEvent(gpuExec);
11555 size_t sz = v->size();
11556 size_t sz2 = v2.size();
11557 size_t sz3 = v3.size();
11558 size_t typesz =
sizeof(int) * sz;
11559 size_t typesz2 =
sizeof(char) * sz2;
11560 size_t typesz3 =
sizeof(char) * sz3;
11561 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11565 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11572 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11576 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11578 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11580 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11582 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11583 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11584 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11585 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11587 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11589 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11591 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11594 size_t size[3] = {sz, sz2, sz3};
11595 size_t work_dimension = 3;
11598 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11599 work_dimension = 1;
11601 else if(temp_sz > 0){
11603 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11604 For default multidimensional global work size, leave the global_work_size vector empty, \ 11605 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11606 the global_work_size vector.");
11610 work_dimension = 1;
11612 else if (temp_sz == 2){
11615 work_dimension = 2;
11622 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11629 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11631 clWaitForEvents(1, &gpuExec);
11633 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11635 clWaitForEvents(1, &gpuExec);
11637 int *result = (
int *) malloc(typesz);
11638 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11640 v->assign(result, result+sz);
11642 clReleaseCommandQueue (queue);
11643 clReleaseMemObject(buffer);
11644 clReleaseMemObject(buffer2);
11645 clReleaseMemObject(buffer3);
11646 clReleaseEvent(gpuExec);
11651 size_t sz = v->size();
11652 size_t sz2 = v2->size();
11653 size_t sz3 = v3.size();
11654 size_t typesz =
sizeof(int) * sz;
11655 size_t typesz2 =
sizeof(char) * sz2;
11656 size_t typesz3 =
sizeof(char) * sz3;
11657 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11661 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11668 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11672 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11674 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11676 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11678 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11679 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11680 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11681 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11683 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11685 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11687 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11690 size_t size[3] = {sz, sz2, sz3};
11691 size_t work_dimension = 3;
11694 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11695 work_dimension = 1;
11697 else if(temp_sz > 0){
11699 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11700 For default multidimensional global work size, leave the global_work_size vector empty, \ 11701 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11702 the global_work_size vector.");
11706 work_dimension = 1;
11708 else if (temp_sz == 2){
11711 work_dimension = 2;
11718 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11725 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11727 clWaitForEvents(1, &gpuExec);
11729 int *result = (
int *) malloc(typesz);
11730 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11732 v->assign(result, result+sz);
11734 if (typesz2 != typesz or sz != sz2){
11736 result2 = (
char *) malloc(typesz2);
11737 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11739 v2->assign(result2, result2+sz2);
11743 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11745 v2->assign(result, result+sz2);
11748 clReleaseCommandQueue (queue);
11749 clReleaseMemObject(buffer);
11750 clReleaseMemObject(buffer2);
11751 clReleaseMemObject(buffer3);
11752 clReleaseEvent(gpuExec);
11757 size_t sz = v->size();
11758 size_t sz2 = v2->size();
11759 size_t sz3 = v3->size();
11760 size_t typesz =
sizeof(int) * sz;
11761 size_t typesz2 =
sizeof(char) * sz2;
11762 size_t typesz3 =
sizeof(char) * sz3;
11763 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11767 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11774 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11778 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11780 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
11782 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
11784 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11785 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11786 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11787 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11789 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
11791 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
11793 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
11796 size_t size[3] = {sz, sz2, sz3};
11797 size_t work_dimension = 3;
11800 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11801 work_dimension = 1;
11803 else if(temp_sz > 0){
11805 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11806 For default multidimensional global work size, leave the global_work_size vector empty, \ 11807 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11808 the global_work_size vector.");
11812 work_dimension = 1;
11814 else if (temp_sz == 2){
11817 work_dimension = 2;
11824 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11831 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11833 clWaitForEvents(1, &gpuExec);
11835 int *result = (
int *) malloc(typesz);
11836 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11838 v->assign(result, result+sz);
11840 if (typesz2 != typesz or sz != sz2){
11842 result2 = (
char *) malloc(typesz2);
11843 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
11845 v2->assign(result2, result2+sz2);
11849 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11851 v2->assign(result, result+sz2);
11854 if (typesz3 != typesz or sz != sz3){
11856 result3 = (
char *) malloc(typesz3);
11857 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
11859 v3->assign(result3, result3+sz3);
11863 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
11865 v3->assign(result, result+sz3);
11868 clReleaseCommandQueue (queue);
11869 clReleaseMemObject(buffer);
11870 clReleaseMemObject(buffer2);
11871 clReleaseMemObject(buffer3);
11872 clReleaseEvent(gpuExec);
11878 size_t sz = v.size();
11879 size_t sz2 = v2.size();
11880 size_t sz3 = v3.size();
11881 size_t typesz =
sizeof(int) * sz;
11882 size_t typesz2 =
sizeof(char) * sz2;
11883 size_t typesz3 =
sizeof(int) * sz3;
11884 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11888 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11895 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11899 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11901 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11903 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
11905 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
11906 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
11907 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
11908 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
11910 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
11912 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
11914 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
11917 size_t size[3] = {sz, sz2, sz3};
11918 size_t work_dimension = 3;
11921 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
11922 work_dimension = 1;
11924 else if(temp_sz > 0){
11926 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 11927 For default multidimensional global work size, leave the global_work_size vector empty, \ 11928 and set multi_dimensional to true. Setting the global work size based on the values inside \ 11929 the global_work_size vector.");
11933 work_dimension = 1;
11935 else if (temp_sz == 2){
11938 work_dimension = 2;
11945 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
11952 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
11954 clWaitForEvents(1, &gpuExec);
11956 int *result = (
int *) malloc(typesz);
11957 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
11959 std::vector<int> res = std::vector<int>();
11960 res.assign(result, result+sz);
11962 clReleaseCommandQueue (queue);
11963 clReleaseMemObject(buffer);
11964 clReleaseMemObject(buffer2);
11965 clReleaseMemObject(buffer3);
11966 clReleaseEvent(gpuExec);
11973 size_t sz = v->size();
11974 size_t sz2 = v2.size();
11975 size_t sz3 = v3.size();
11976 size_t typesz =
sizeof(int) * sz;
11977 size_t typesz2 =
sizeof(char) * sz2;
11978 size_t typesz3 =
sizeof(int) * sz3;
11979 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
11983 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
11990 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
11994 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
11996 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
11998 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12000 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12001 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12002 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12003 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12005 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12007 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12009 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12012 size_t size[3] = {sz, sz2, sz3};
12013 size_t work_dimension = 3;
12016 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12017 work_dimension = 1;
12019 else if(temp_sz > 0){
12021 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12022 For default multidimensional global work size, leave the global_work_size vector empty, \ 12023 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12024 the global_work_size vector.");
12028 work_dimension = 1;
12030 else if (temp_sz == 2){
12033 work_dimension = 2;
12040 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12047 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12049 clWaitForEvents(1, &gpuExec);
12051 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12053 clWaitForEvents(1, &gpuExec);
12055 int *result = (
int *) malloc(typesz);
12056 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12058 v->assign(result, result+sz);
12060 clReleaseCommandQueue (queue);
12061 clReleaseMemObject(buffer);
12062 clReleaseMemObject(buffer2);
12063 clReleaseMemObject(buffer3);
12064 clReleaseEvent(gpuExec);
12069 size_t sz = v->size();
12070 size_t sz2 = v2->size();
12071 size_t sz3 = v3.size();
12072 size_t typesz =
sizeof(int) * sz;
12073 size_t typesz2 =
sizeof(char) * sz2;
12074 size_t typesz3 =
sizeof(int) * sz3;
12075 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12079 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12086 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12090 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12092 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12094 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12096 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12097 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12098 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12099 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12101 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12103 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12105 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12108 size_t size[3] = {sz, sz2, sz3};
12109 size_t work_dimension = 3;
12112 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12113 work_dimension = 1;
12115 else if(temp_sz > 0){
12117 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12118 For default multidimensional global work size, leave the global_work_size vector empty, \ 12119 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12120 the global_work_size vector.");
12124 work_dimension = 1;
12126 else if (temp_sz == 2){
12129 work_dimension = 2;
12136 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12143 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12145 clWaitForEvents(1, &gpuExec);
12147 int *result = (
int *) malloc(typesz);
12148 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12150 v->assign(result, result+sz);
12152 if (typesz2 != typesz or sz != sz2){
12154 result2 = (
char *) malloc(typesz2);
12155 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12157 v2->assign(result2, result2+sz2);
12161 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12163 v2->assign(result, result+sz2);
12166 clReleaseCommandQueue (queue);
12167 clReleaseMemObject(buffer);
12168 clReleaseMemObject(buffer2);
12169 clReleaseMemObject(buffer3);
12170 clReleaseEvent(gpuExec);
12175 size_t sz = v->size();
12176 size_t sz2 = v2->size();
12177 size_t sz3 = v3->size();
12178 size_t typesz =
sizeof(int) * sz;
12179 size_t typesz2 =
sizeof(char) * sz2;
12180 size_t typesz3 =
sizeof(int) * sz3;
12181 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12185 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12192 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12196 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12198 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12200 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
12202 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12203 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12204 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12205 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12207 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12209 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12211 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
12214 size_t size[3] = {sz, sz2, sz3};
12215 size_t work_dimension = 3;
12218 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12219 work_dimension = 1;
12221 else if(temp_sz > 0){
12223 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12224 For default multidimensional global work size, leave the global_work_size vector empty, \ 12225 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12226 the global_work_size vector.");
12230 work_dimension = 1;
12232 else if (temp_sz == 2){
12235 work_dimension = 2;
12242 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12249 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12251 clWaitForEvents(1, &gpuExec);
12253 int *result = (
int *) malloc(typesz);
12254 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12256 v->assign(result, result+sz);
12258 if (typesz2 != typesz or sz != sz2){
12260 result2 = (
char *) malloc(typesz2);
12261 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12263 v2->assign(result2, result2+sz2);
12267 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12269 v2->assign(result, result+sz2);
12272 if (typesz3 != typesz or sz != sz3){
12274 result3 = (
int *) malloc(typesz3);
12275 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
12277 v3->assign(result3, result3+sz3);
12281 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12283 v3->assign(result, result+sz3);
12286 clReleaseCommandQueue (queue);
12287 clReleaseMemObject(buffer);
12288 clReleaseMemObject(buffer2);
12289 clReleaseMemObject(buffer3);
12290 clReleaseEvent(gpuExec);
12296 size_t sz = v.size();
12297 size_t sz2 = v2.size();
12298 size_t sz3 = v3.size();
12299 size_t typesz =
sizeof(int) * sz;
12300 size_t typesz2 =
sizeof(char) * sz2;
12301 size_t typesz3 =
sizeof(float) * sz3;
12302 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12306 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12313 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12317 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12319 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12321 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12323 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12324 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12325 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12326 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12328 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
12330 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12332 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12335 size_t size[3] = {sz, sz2, sz3};
12336 size_t work_dimension = 3;
12339 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12340 work_dimension = 1;
12342 else if(temp_sz > 0){
12344 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12345 For default multidimensional global work size, leave the global_work_size vector empty, \ 12346 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12347 the global_work_size vector.");
12351 work_dimension = 1;
12353 else if (temp_sz == 2){
12356 work_dimension = 2;
12363 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12370 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12372 clWaitForEvents(1, &gpuExec);
12374 int *result = (
int *) malloc(typesz);
12375 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12377 std::vector<int> res = std::vector<int>();
12378 res.assign(result, result+sz);
12380 clReleaseCommandQueue (queue);
12381 clReleaseMemObject(buffer);
12382 clReleaseMemObject(buffer2);
12383 clReleaseMemObject(buffer3);
12384 clReleaseEvent(gpuExec);
12391 size_t sz = v->size();
12392 size_t sz2 = v2.size();
12393 size_t sz3 = v3.size();
12394 size_t typesz =
sizeof(int) * sz;
12395 size_t typesz2 =
sizeof(char) * sz2;
12396 size_t typesz3 =
sizeof(float) * sz3;
12397 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12401 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12408 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12412 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12414 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12416 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12418 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12419 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12420 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12421 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12423 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12425 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12427 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12430 size_t size[3] = {sz, sz2, sz3};
12431 size_t work_dimension = 3;
12434 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12435 work_dimension = 1;
12437 else if(temp_sz > 0){
12439 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12440 For default multidimensional global work size, leave the global_work_size vector empty, \ 12441 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12442 the global_work_size vector.");
12446 work_dimension = 1;
12448 else if (temp_sz == 2){
12451 work_dimension = 2;
12458 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12465 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12467 clWaitForEvents(1, &gpuExec);
12469 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12471 clWaitForEvents(1, &gpuExec);
12473 int *result = (
int *) malloc(typesz);
12474 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12476 v->assign(result, result+sz);
12478 clReleaseCommandQueue (queue);
12479 clReleaseMemObject(buffer);
12480 clReleaseMemObject(buffer2);
12481 clReleaseMemObject(buffer3);
12482 clReleaseEvent(gpuExec);
12487 size_t sz = v->size();
12488 size_t sz2 = v2->size();
12489 size_t sz3 = v3.size();
12490 size_t typesz =
sizeof(int) * sz;
12491 size_t typesz2 =
sizeof(char) * sz2;
12492 size_t typesz3 =
sizeof(float) * sz3;
12493 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12497 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12504 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12508 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12510 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12512 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12514 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12515 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12516 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12517 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12519 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12521 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12523 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12526 size_t size[3] = {sz, sz2, sz3};
12527 size_t work_dimension = 3;
12530 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12531 work_dimension = 1;
12533 else if(temp_sz > 0){
12535 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12536 For default multidimensional global work size, leave the global_work_size vector empty, \ 12537 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12538 the global_work_size vector.");
12542 work_dimension = 1;
12544 else if (temp_sz == 2){
12547 work_dimension = 2;
12554 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12561 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12563 clWaitForEvents(1, &gpuExec);
12565 int *result = (
int *) malloc(typesz);
12566 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12568 v->assign(result, result+sz);
12570 if (typesz2 != typesz or sz != sz2){
12572 result2 = (
char *) malloc(typesz2);
12573 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12575 v2->assign(result2, result2+sz2);
12579 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12581 v2->assign(result, result+sz2);
12584 clReleaseCommandQueue (queue);
12585 clReleaseMemObject(buffer);
12586 clReleaseMemObject(buffer2);
12587 clReleaseMemObject(buffer3);
12588 clReleaseEvent(gpuExec);
12593 size_t sz = v->size();
12594 size_t sz2 = v2->size();
12595 size_t sz3 = v3->size();
12596 size_t typesz =
sizeof(int) * sz;
12597 size_t typesz2 =
sizeof(char) * sz2;
12598 size_t typesz3 =
sizeof(float) * sz3;
12599 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12603 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12610 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12614 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12616 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12618 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
12620 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12621 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12622 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12623 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12625 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12627 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12629 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
12632 size_t size[3] = {sz, sz2, sz3};
12633 size_t work_dimension = 3;
12636 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12637 work_dimension = 1;
12639 else if(temp_sz > 0){
12641 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12642 For default multidimensional global work size, leave the global_work_size vector empty, \ 12643 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12644 the global_work_size vector.");
12648 work_dimension = 1;
12650 else if (temp_sz == 2){
12653 work_dimension = 2;
12660 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12667 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12669 clWaitForEvents(1, &gpuExec);
12671 int *result = (
int *) malloc(typesz);
12672 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12674 v->assign(result, result+sz);
12676 if (typesz2 != typesz or sz != sz2){
12678 result2 = (
char *) malloc(typesz2);
12679 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12681 v2->assign(result2, result2+sz2);
12685 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12687 v2->assign(result, result+sz2);
12690 if (typesz3 != typesz or sz != sz3){
12692 result3 = (
float *) malloc(typesz3);
12693 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
12695 v3->assign(result3, result3+sz3);
12699 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12701 v3->assign(result, result+sz3);
12704 clReleaseCommandQueue (queue);
12705 clReleaseMemObject(buffer);
12706 clReleaseMemObject(buffer2);
12707 clReleaseMemObject(buffer3);
12708 clReleaseEvent(gpuExec);
12714 size_t sz = v.size();
12715 size_t sz2 = v2.size();
12716 size_t sz3 = v3.size();
12717 size_t typesz =
sizeof(int) * sz;
12718 size_t typesz2 =
sizeof(char) * sz2;
12719 size_t typesz3 =
sizeof(double) * sz3;
12720 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12724 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12731 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12735 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12737 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12739 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12741 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12742 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12743 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12744 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12746 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
12748 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12750 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12753 size_t size[3] = {sz, sz2, sz3};
12754 size_t work_dimension = 3;
12757 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12758 work_dimension = 1;
12760 else if(temp_sz > 0){
12762 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12763 For default multidimensional global work size, leave the global_work_size vector empty, \ 12764 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12765 the global_work_size vector.");
12769 work_dimension = 1;
12771 else if (temp_sz == 2){
12774 work_dimension = 2;
12781 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12788 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12790 clWaitForEvents(1, &gpuExec);
12792 int *result = (
int *) malloc(typesz);
12793 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12795 std::vector<int> res = std::vector<int>();
12796 res.assign(result, result+sz);
12798 clReleaseCommandQueue (queue);
12799 clReleaseMemObject(buffer);
12800 clReleaseMemObject(buffer2);
12801 clReleaseMemObject(buffer3);
12802 clReleaseEvent(gpuExec);
12809 size_t sz = v->size();
12810 size_t sz2 = v2.size();
12811 size_t sz3 = v3.size();
12812 size_t typesz =
sizeof(int) * sz;
12813 size_t typesz2 =
sizeof(char) * sz2;
12814 size_t typesz3 =
sizeof(double) * sz3;
12815 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12819 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12826 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12830 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12832 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
12834 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12836 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12837 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12838 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12839 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12841 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12843 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
12845 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12848 size_t size[3] = {sz, sz2, sz3};
12849 size_t work_dimension = 3;
12852 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12853 work_dimension = 1;
12855 else if(temp_sz > 0){
12857 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12858 For default multidimensional global work size, leave the global_work_size vector empty, \ 12859 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12860 the global_work_size vector.");
12864 work_dimension = 1;
12866 else if (temp_sz == 2){
12869 work_dimension = 2;
12876 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12883 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12885 clWaitForEvents(1, &gpuExec);
12887 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12889 clWaitForEvents(1, &gpuExec);
12891 int *result = (
int *) malloc(typesz);
12892 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12894 v->assign(result, result+sz);
12896 clReleaseCommandQueue (queue);
12897 clReleaseMemObject(buffer);
12898 clReleaseMemObject(buffer2);
12899 clReleaseMemObject(buffer3);
12900 clReleaseEvent(gpuExec);
12905 size_t sz = v->size();
12906 size_t sz2 = v2->size();
12907 size_t sz3 = v3.size();
12908 size_t typesz =
sizeof(int) * sz;
12909 size_t typesz2 =
sizeof(char) * sz2;
12910 size_t typesz3 =
sizeof(double) * sz3;
12911 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
12915 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
12922 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
12926 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
12928 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
12930 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
12932 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
12933 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
12934 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
12935 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
12937 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
12939 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
12941 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
12944 size_t size[3] = {sz, sz2, sz3};
12945 size_t work_dimension = 3;
12948 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
12949 work_dimension = 1;
12951 else if(temp_sz > 0){
12953 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 12954 For default multidimensional global work size, leave the global_work_size vector empty, \ 12955 and set multi_dimensional to true. Setting the global work size based on the values inside \ 12956 the global_work_size vector.");
12960 work_dimension = 1;
12962 else if (temp_sz == 2){
12965 work_dimension = 2;
12972 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
12979 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
12981 clWaitForEvents(1, &gpuExec);
12983 int *result = (
int *) malloc(typesz);
12984 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
12986 v->assign(result, result+sz);
12988 if (typesz2 != typesz or sz != sz2){
12990 result2 = (
char *) malloc(typesz2);
12991 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
12993 v2->assign(result2, result2+sz2);
12997 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
12999 v2->assign(result, result+sz2);
13002 clReleaseCommandQueue (queue);
13003 clReleaseMemObject(buffer);
13004 clReleaseMemObject(buffer2);
13005 clReleaseMemObject(buffer3);
13006 clReleaseEvent(gpuExec);
13011 size_t sz = v->size();
13012 size_t sz2 = v2->size();
13013 size_t sz3 = v3->size();
13014 size_t typesz =
sizeof(int) * sz;
13015 size_t typesz2 =
sizeof(char) * sz2;
13016 size_t typesz3 =
sizeof(double) * sz3;
13017 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13021 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13028 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13032 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13034 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13036 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13038 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13039 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13040 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13041 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13043 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13045 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13047 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13050 size_t size[3] = {sz, sz2, sz3};
13051 size_t work_dimension = 3;
13054 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13055 work_dimension = 1;
13057 else if(temp_sz > 0){
13059 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13060 For default multidimensional global work size, leave the global_work_size vector empty, \ 13061 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13062 the global_work_size vector.");
13066 work_dimension = 1;
13068 else if (temp_sz == 2){
13071 work_dimension = 2;
13078 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13085 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13087 clWaitForEvents(1, &gpuExec);
13089 int *result = (
int *) malloc(typesz);
13090 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13092 v->assign(result, result+sz);
13094 if (typesz2 != typesz or sz != sz2){
13096 result2 = (
char *) malloc(typesz2);
13097 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13099 v2->assign(result2, result2+sz2);
13103 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13105 v2->assign(result, result+sz2);
13108 if (typesz3 != typesz or sz != sz3){
13110 result3 = (
double *) malloc(typesz3);
13111 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13113 v3->assign(result3, result3+sz3);
13117 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13119 v3->assign(result, result+sz3);
13122 clReleaseCommandQueue (queue);
13123 clReleaseMemObject(buffer);
13124 clReleaseMemObject(buffer2);
13125 clReleaseMemObject(buffer3);
13126 clReleaseEvent(gpuExec);
13132 size_t sz = v.size();
13133 size_t sz2 = v2.size();
13134 size_t sz3 = v3.size();
13135 size_t typesz =
sizeof(int) * sz;
13136 size_t typesz2 =
sizeof(int) * sz2;
13137 size_t typesz3 =
sizeof(char) * sz3;
13138 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13142 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13149 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13153 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13155 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13157 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13159 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13160 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13161 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13162 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13164 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
13166 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13168 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13171 size_t size[3] = {sz, sz2, sz3};
13172 size_t work_dimension = 3;
13175 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13176 work_dimension = 1;
13178 else if(temp_sz > 0){
13180 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13181 For default multidimensional global work size, leave the global_work_size vector empty, \ 13182 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13183 the global_work_size vector.");
13187 work_dimension = 1;
13189 else if (temp_sz == 2){
13192 work_dimension = 2;
13199 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13206 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13208 clWaitForEvents(1, &gpuExec);
13210 int *result = (
int *) malloc(typesz);
13211 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13213 std::vector<int> res = std::vector<int>();
13214 res.assign(result, result+sz);
13216 clReleaseCommandQueue (queue);
13217 clReleaseMemObject(buffer);
13218 clReleaseMemObject(buffer2);
13219 clReleaseMemObject(buffer3);
13220 clReleaseEvent(gpuExec);
13227 size_t sz = v->size();
13228 size_t sz2 = v2.size();
13229 size_t sz3 = v3.size();
13230 size_t typesz =
sizeof(int) * sz;
13231 size_t typesz2 =
sizeof(int) * sz2;
13232 size_t typesz3 =
sizeof(char) * sz3;
13233 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13237 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13244 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13248 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13250 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13252 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13254 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13255 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13256 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13257 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13259 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13261 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13263 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13266 size_t size[3] = {sz, sz2, sz3};
13267 size_t work_dimension = 3;
13270 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13271 work_dimension = 1;
13273 else if(temp_sz > 0){
13275 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13276 For default multidimensional global work size, leave the global_work_size vector empty, \ 13277 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13278 the global_work_size vector.");
13282 work_dimension = 1;
13284 else if (temp_sz == 2){
13287 work_dimension = 2;
13294 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13301 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13303 clWaitForEvents(1, &gpuExec);
13305 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13307 clWaitForEvents(1, &gpuExec);
13309 int *result = (
int *) malloc(typesz);
13310 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13312 v->assign(result, result+sz);
13314 clReleaseCommandQueue (queue);
13315 clReleaseMemObject(buffer);
13316 clReleaseMemObject(buffer2);
13317 clReleaseMemObject(buffer3);
13318 clReleaseEvent(gpuExec);
13323 size_t sz = v->size();
13324 size_t sz2 = v2->size();
13325 size_t sz3 = v3.size();
13326 size_t typesz =
sizeof(int) * sz;
13327 size_t typesz2 =
sizeof(int) * sz2;
13328 size_t typesz3 =
sizeof(char) * sz3;
13329 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13333 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13340 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13344 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13346 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13348 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13350 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13351 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13352 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13353 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13355 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13357 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13359 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13362 size_t size[3] = {sz, sz2, sz3};
13363 size_t work_dimension = 3;
13366 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13367 work_dimension = 1;
13369 else if(temp_sz > 0){
13371 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13372 For default multidimensional global work size, leave the global_work_size vector empty, \ 13373 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13374 the global_work_size vector.");
13378 work_dimension = 1;
13380 else if (temp_sz == 2){
13383 work_dimension = 2;
13390 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13397 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13399 clWaitForEvents(1, &gpuExec);
13401 int *result = (
int *) malloc(typesz);
13402 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13404 v->assign(result, result+sz);
13406 if (typesz2 != typesz or sz != sz2){
13408 result2 = (
int *) malloc(typesz2);
13409 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13411 v2->assign(result2, result2+sz2);
13415 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13417 v2->assign(result, result+sz2);
13420 clReleaseCommandQueue (queue);
13421 clReleaseMemObject(buffer);
13422 clReleaseMemObject(buffer2);
13423 clReleaseMemObject(buffer3);
13424 clReleaseEvent(gpuExec);
13429 size_t sz = v->size();
13430 size_t sz2 = v2->size();
13431 size_t sz3 = v3->size();
13432 size_t typesz =
sizeof(int) * sz;
13433 size_t typesz2 =
sizeof(int) * sz2;
13434 size_t typesz3 =
sizeof(char) * sz3;
13435 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13439 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13446 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13450 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13452 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13454 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13456 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13457 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13458 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13459 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13461 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13463 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13465 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13468 size_t size[3] = {sz, sz2, sz3};
13469 size_t work_dimension = 3;
13472 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13473 work_dimension = 1;
13475 else if(temp_sz > 0){
13477 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13478 For default multidimensional global work size, leave the global_work_size vector empty, \ 13479 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13480 the global_work_size vector.");
13484 work_dimension = 1;
13486 else if (temp_sz == 2){
13489 work_dimension = 2;
13496 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13503 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13505 clWaitForEvents(1, &gpuExec);
13507 int *result = (
int *) malloc(typesz);
13508 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13510 v->assign(result, result+sz);
13512 if (typesz2 != typesz or sz != sz2){
13514 result2 = (
int *) malloc(typesz2);
13515 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13517 v2->assign(result2, result2+sz2);
13521 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13523 v2->assign(result, result+sz2);
13526 if (typesz3 != typesz or sz != sz3){
13528 result3 = (
char *) malloc(typesz3);
13529 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13531 v3->assign(result3, result3+sz3);
13535 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13537 v3->assign(result, result+sz3);
13540 clReleaseCommandQueue (queue);
13541 clReleaseMemObject(buffer);
13542 clReleaseMemObject(buffer2);
13543 clReleaseMemObject(buffer3);
13544 clReleaseEvent(gpuExec);
13550 size_t sz = v.size();
13551 size_t sz2 = v2.size();
13552 size_t sz3 = v3.size();
13553 size_t typesz =
sizeof(int) * sz;
13554 size_t typesz2 =
sizeof(int) * sz2;
13555 size_t typesz3 =
sizeof(int) * sz3;
13556 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13560 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13567 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13571 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13573 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13575 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13577 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13578 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13579 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13580 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13582 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
13584 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13586 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13589 size_t size[3] = {sz, sz2, sz3};
13590 size_t work_dimension = 3;
13593 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13594 work_dimension = 1;
13596 else if(temp_sz > 0){
13598 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13599 For default multidimensional global work size, leave the global_work_size vector empty, \ 13600 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13601 the global_work_size vector.");
13605 work_dimension = 1;
13607 else if (temp_sz == 2){
13610 work_dimension = 2;
13617 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13624 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13626 clWaitForEvents(1, &gpuExec);
13628 int *result = (
int *) malloc(typesz);
13629 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13631 std::vector<int> res = std::vector<int>();
13632 res.assign(result, result+sz);
13634 clReleaseCommandQueue (queue);
13635 clReleaseMemObject(buffer);
13636 clReleaseMemObject(buffer2);
13637 clReleaseMemObject(buffer3);
13638 clReleaseEvent(gpuExec);
13645 size_t sz = v->size();
13646 size_t sz2 = v2.size();
13647 size_t sz3 = v3.size();
13648 size_t typesz =
sizeof(int) * sz;
13649 size_t typesz2 =
sizeof(int) * sz2;
13650 size_t typesz3 =
sizeof(int) * sz3;
13651 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13655 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13662 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13666 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13668 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13670 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13672 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13673 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13674 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13675 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13677 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13679 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
13681 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13684 size_t size[3] = {sz, sz2, sz3};
13685 size_t work_dimension = 3;
13688 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13689 work_dimension = 1;
13691 else if(temp_sz > 0){
13693 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13694 For default multidimensional global work size, leave the global_work_size vector empty, \ 13695 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13696 the global_work_size vector.");
13700 work_dimension = 1;
13702 else if (temp_sz == 2){
13705 work_dimension = 2;
13712 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13719 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13721 clWaitForEvents(1, &gpuExec);
13723 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13725 clWaitForEvents(1, &gpuExec);
13727 int *result = (
int *) malloc(typesz);
13728 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13730 v->assign(result, result+sz);
13732 clReleaseCommandQueue (queue);
13733 clReleaseMemObject(buffer);
13734 clReleaseMemObject(buffer2);
13735 clReleaseMemObject(buffer3);
13736 clReleaseEvent(gpuExec);
13741 size_t sz = v->size();
13742 size_t sz2 = v2->size();
13743 size_t sz3 = v3.size();
13744 size_t typesz =
sizeof(int) * sz;
13745 size_t typesz2 =
sizeof(int) * sz2;
13746 size_t typesz3 =
sizeof(int) * sz3;
13747 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13751 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13758 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13762 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13764 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13766 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13768 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13769 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13770 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13771 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13773 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13775 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13777 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
13780 size_t size[3] = {sz, sz2, sz3};
13781 size_t work_dimension = 3;
13784 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13785 work_dimension = 1;
13787 else if(temp_sz > 0){
13789 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13790 For default multidimensional global work size, leave the global_work_size vector empty, \ 13791 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13792 the global_work_size vector.");
13796 work_dimension = 1;
13798 else if (temp_sz == 2){
13801 work_dimension = 2;
13808 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13815 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13817 clWaitForEvents(1, &gpuExec);
13819 int *result = (
int *) malloc(typesz);
13820 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13822 v->assign(result, result+sz);
13824 if (typesz2 != typesz or sz != sz2){
13826 result2 = (
int *) malloc(typesz2);
13827 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13829 v2->assign(result2, result2+sz2);
13833 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13835 v2->assign(result, result+sz2);
13838 clReleaseCommandQueue (queue);
13839 clReleaseMemObject(buffer);
13840 clReleaseMemObject(buffer2);
13841 clReleaseMemObject(buffer3);
13842 clReleaseEvent(gpuExec);
13847 size_t sz = v->size();
13848 size_t sz2 = v2->size();
13849 size_t sz3 = v3->size();
13850 size_t typesz =
sizeof(int) * sz;
13851 size_t typesz2 =
sizeof(int) * sz2;
13852 size_t typesz3 =
sizeof(int) * sz3;
13853 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13857 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13864 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13868 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13870 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
13872 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
13874 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13875 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13876 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13877 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
13879 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
13881 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
13883 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
13886 size_t size[3] = {sz, sz2, sz3};
13887 size_t work_dimension = 3;
13890 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
13891 work_dimension = 1;
13893 else if(temp_sz > 0){
13895 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 13896 For default multidimensional global work size, leave the global_work_size vector empty, \ 13897 and set multi_dimensional to true. Setting the global work size based on the values inside \ 13898 the global_work_size vector.");
13902 work_dimension = 1;
13904 else if (temp_sz == 2){
13907 work_dimension = 2;
13914 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
13921 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
13923 clWaitForEvents(1, &gpuExec);
13925 int *result = (
int *) malloc(typesz);
13926 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
13928 v->assign(result, result+sz);
13930 if (typesz2 != typesz or sz != sz2){
13932 result2 = (
int *) malloc(typesz2);
13933 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
13935 v2->assign(result2, result2+sz2);
13939 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13941 v2->assign(result, result+sz2);
13944 if (typesz3 != typesz or sz != sz3){
13946 result3 = (
int *) malloc(typesz3);
13947 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
13949 v3->assign(result3, result3+sz3);
13953 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
13955 v3->assign(result, result+sz3);
13958 clReleaseCommandQueue (queue);
13959 clReleaseMemObject(buffer);
13960 clReleaseMemObject(buffer2);
13961 clReleaseMemObject(buffer3);
13962 clReleaseEvent(gpuExec);
13968 size_t sz = v.size();
13969 size_t sz2 = v2.size();
13970 size_t sz3 = v3.size();
13971 size_t typesz =
sizeof(int) * sz;
13972 size_t typesz2 =
sizeof(int) * sz2;
13973 size_t typesz3 =
sizeof(float) * sz3;
13974 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
13978 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
13985 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
13989 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
13991 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
13993 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
13995 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
13996 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
13997 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
13998 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14000 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14002 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14004 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14007 size_t size[3] = {sz, sz2, sz3};
14008 size_t work_dimension = 3;
14011 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14012 work_dimension = 1;
14014 else if(temp_sz > 0){
14016 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14017 For default multidimensional global work size, leave the global_work_size vector empty, \ 14018 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14019 the global_work_size vector.");
14023 work_dimension = 1;
14025 else if (temp_sz == 2){
14028 work_dimension = 2;
14035 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14042 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14044 clWaitForEvents(1, &gpuExec);
14046 int *result = (
int *) malloc(typesz);
14047 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14049 std::vector<int> res = std::vector<int>();
14050 res.assign(result, result+sz);
14052 clReleaseCommandQueue (queue);
14053 clReleaseMemObject(buffer);
14054 clReleaseMemObject(buffer2);
14055 clReleaseMemObject(buffer3);
14056 clReleaseEvent(gpuExec);
14063 size_t sz = v->size();
14064 size_t sz2 = v2.size();
14065 size_t sz3 = v3.size();
14066 size_t typesz =
sizeof(int) * sz;
14067 size_t typesz2 =
sizeof(int) * sz2;
14068 size_t typesz3 =
sizeof(float) * sz3;
14069 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14073 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14080 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14084 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14086 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14088 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14090 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14091 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14092 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14093 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14095 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14097 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14099 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14102 size_t size[3] = {sz, sz2, sz3};
14103 size_t work_dimension = 3;
14106 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14107 work_dimension = 1;
14109 else if(temp_sz > 0){
14111 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14112 For default multidimensional global work size, leave the global_work_size vector empty, \ 14113 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14114 the global_work_size vector.");
14118 work_dimension = 1;
14120 else if (temp_sz == 2){
14123 work_dimension = 2;
14130 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14137 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14139 clWaitForEvents(1, &gpuExec);
14141 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14143 clWaitForEvents(1, &gpuExec);
14145 int *result = (
int *) malloc(typesz);
14146 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14148 v->assign(result, result+sz);
14150 clReleaseCommandQueue (queue);
14151 clReleaseMemObject(buffer);
14152 clReleaseMemObject(buffer2);
14153 clReleaseMemObject(buffer3);
14154 clReleaseEvent(gpuExec);
14159 size_t sz = v->size();
14160 size_t sz2 = v2->size();
14161 size_t sz3 = v3.size();
14162 size_t typesz =
sizeof(int) * sz;
14163 size_t typesz2 =
sizeof(int) * sz2;
14164 size_t typesz3 =
sizeof(float) * sz3;
14165 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14169 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14176 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14180 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14182 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14184 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14186 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14187 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14188 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14189 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14191 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14193 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14195 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14198 size_t size[3] = {sz, sz2, sz3};
14199 size_t work_dimension = 3;
14202 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14203 work_dimension = 1;
14205 else if(temp_sz > 0){
14207 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14208 For default multidimensional global work size, leave the global_work_size vector empty, \ 14209 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14210 the global_work_size vector.");
14214 work_dimension = 1;
14216 else if (temp_sz == 2){
14219 work_dimension = 2;
14226 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14233 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14235 clWaitForEvents(1, &gpuExec);
14237 int *result = (
int *) malloc(typesz);
14238 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14240 v->assign(result, result+sz);
14242 if (typesz2 != typesz or sz != sz2){
14244 result2 = (
int *) malloc(typesz2);
14245 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14247 v2->assign(result2, result2+sz2);
14251 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14253 v2->assign(result, result+sz2);
14256 clReleaseCommandQueue (queue);
14257 clReleaseMemObject(buffer);
14258 clReleaseMemObject(buffer2);
14259 clReleaseMemObject(buffer3);
14260 clReleaseEvent(gpuExec);
14265 size_t sz = v->size();
14266 size_t sz2 = v2->size();
14267 size_t sz3 = v3->size();
14268 size_t typesz =
sizeof(int) * sz;
14269 size_t typesz2 =
sizeof(int) * sz2;
14270 size_t typesz3 =
sizeof(float) * sz3;
14271 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14275 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14282 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14286 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14288 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14290 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
14292 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14293 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14294 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14295 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14297 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14299 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14301 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
14304 size_t size[3] = {sz, sz2, sz3};
14305 size_t work_dimension = 3;
14308 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14309 work_dimension = 1;
14311 else if(temp_sz > 0){
14313 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14314 For default multidimensional global work size, leave the global_work_size vector empty, \ 14315 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14316 the global_work_size vector.");
14320 work_dimension = 1;
14322 else if (temp_sz == 2){
14325 work_dimension = 2;
14332 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14339 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14341 clWaitForEvents(1, &gpuExec);
14343 int *result = (
int *) malloc(typesz);
14344 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14346 v->assign(result, result+sz);
14348 if (typesz2 != typesz or sz != sz2){
14350 result2 = (
int *) malloc(typesz2);
14351 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14353 v2->assign(result2, result2+sz2);
14357 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14359 v2->assign(result, result+sz2);
14362 if (typesz3 != typesz or sz != sz3){
14364 result3 = (
float *) malloc(typesz3);
14365 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
14367 v3->assign(result3, result3+sz3);
14371 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14373 v3->assign(result, result+sz3);
14376 clReleaseCommandQueue (queue);
14377 clReleaseMemObject(buffer);
14378 clReleaseMemObject(buffer2);
14379 clReleaseMemObject(buffer3);
14380 clReleaseEvent(gpuExec);
14386 size_t sz = v.size();
14387 size_t sz2 = v2.size();
14388 size_t sz3 = v3.size();
14389 size_t typesz =
sizeof(int) * sz;
14390 size_t typesz2 =
sizeof(int) * sz2;
14391 size_t typesz3 =
sizeof(double) * sz3;
14392 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14396 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14403 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14407 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14409 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14411 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14413 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14414 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14415 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14416 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14418 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14420 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14422 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14425 size_t size[3] = {sz, sz2, sz3};
14426 size_t work_dimension = 3;
14429 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14430 work_dimension = 1;
14432 else if(temp_sz > 0){
14434 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14435 For default multidimensional global work size, leave the global_work_size vector empty, \ 14436 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14437 the global_work_size vector.");
14441 work_dimension = 1;
14443 else if (temp_sz == 2){
14446 work_dimension = 2;
14453 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14460 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14462 clWaitForEvents(1, &gpuExec);
14464 int *result = (
int *) malloc(typesz);
14465 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14467 std::vector<int> res = std::vector<int>();
14468 res.assign(result, result+sz);
14470 clReleaseCommandQueue (queue);
14471 clReleaseMemObject(buffer);
14472 clReleaseMemObject(buffer2);
14473 clReleaseMemObject(buffer3);
14474 clReleaseEvent(gpuExec);
14481 size_t sz = v->size();
14482 size_t sz2 = v2.size();
14483 size_t sz3 = v3.size();
14484 size_t typesz =
sizeof(int) * sz;
14485 size_t typesz2 =
sizeof(int) * sz2;
14486 size_t typesz3 =
sizeof(double) * sz3;
14487 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14491 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14498 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14502 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14504 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14506 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14508 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14509 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14510 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14511 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14513 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14515 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14517 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14520 size_t size[3] = {sz, sz2, sz3};
14521 size_t work_dimension = 3;
14524 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14525 work_dimension = 1;
14527 else if(temp_sz > 0){
14529 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14530 For default multidimensional global work size, leave the global_work_size vector empty, \ 14531 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14532 the global_work_size vector.");
14536 work_dimension = 1;
14538 else if (temp_sz == 2){
14541 work_dimension = 2;
14548 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14555 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14557 clWaitForEvents(1, &gpuExec);
14559 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14561 clWaitForEvents(1, &gpuExec);
14563 int *result = (
int *) malloc(typesz);
14564 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14566 v->assign(result, result+sz);
14568 clReleaseCommandQueue (queue);
14569 clReleaseMemObject(buffer);
14570 clReleaseMemObject(buffer2);
14571 clReleaseMemObject(buffer3);
14572 clReleaseEvent(gpuExec);
14577 size_t sz = v->size();
14578 size_t sz2 = v2->size();
14579 size_t sz3 = v3.size();
14580 size_t typesz =
sizeof(int) * sz;
14581 size_t typesz2 =
sizeof(int) * sz2;
14582 size_t typesz3 =
sizeof(double) * sz3;
14583 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14587 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14594 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14598 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14600 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14602 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14604 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14605 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14606 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14607 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14609 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14611 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14613 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14616 size_t size[3] = {sz, sz2, sz3};
14617 size_t work_dimension = 3;
14620 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14621 work_dimension = 1;
14623 else if(temp_sz > 0){
14625 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14626 For default multidimensional global work size, leave the global_work_size vector empty, \ 14627 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14628 the global_work_size vector.");
14632 work_dimension = 1;
14634 else if (temp_sz == 2){
14637 work_dimension = 2;
14644 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14651 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14653 clWaitForEvents(1, &gpuExec);
14655 int *result = (
int *) malloc(typesz);
14656 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14658 v->assign(result, result+sz);
14660 if (typesz2 != typesz or sz != sz2){
14662 result2 = (
int *) malloc(typesz2);
14663 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14665 v2->assign(result2, result2+sz2);
14669 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14671 v2->assign(result, result+sz2);
14674 clReleaseCommandQueue (queue);
14675 clReleaseMemObject(buffer);
14676 clReleaseMemObject(buffer2);
14677 clReleaseMemObject(buffer3);
14678 clReleaseEvent(gpuExec);
14683 size_t sz = v->size();
14684 size_t sz2 = v2->size();
14685 size_t sz3 = v3->size();
14686 size_t typesz =
sizeof(int) * sz;
14687 size_t typesz2 =
sizeof(int) * sz2;
14688 size_t typesz3 =
sizeof(double) * sz3;
14689 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14693 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14700 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14704 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14706 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
14708 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
14710 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14711 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14712 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14713 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14715 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14717 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
14719 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
14722 size_t size[3] = {sz, sz2, sz3};
14723 size_t work_dimension = 3;
14726 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14727 work_dimension = 1;
14729 else if(temp_sz > 0){
14731 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14732 For default multidimensional global work size, leave the global_work_size vector empty, \ 14733 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14734 the global_work_size vector.");
14738 work_dimension = 1;
14740 else if (temp_sz == 2){
14743 work_dimension = 2;
14750 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14757 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14759 clWaitForEvents(1, &gpuExec);
14761 int *result = (
int *) malloc(typesz);
14762 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14764 v->assign(result, result+sz);
14766 if (typesz2 != typesz or sz != sz2){
14768 result2 = (
int *) malloc(typesz2);
14769 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
14771 v2->assign(result2, result2+sz2);
14775 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14777 v2->assign(result, result+sz2);
14780 if (typesz3 != typesz or sz != sz3){
14782 result3 = (
double *) malloc(typesz3);
14783 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
14785 v3->assign(result3, result3+sz3);
14789 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
14791 v3->assign(result, result+sz3);
14794 clReleaseCommandQueue (queue);
14795 clReleaseMemObject(buffer);
14796 clReleaseMemObject(buffer2);
14797 clReleaseMemObject(buffer3);
14798 clReleaseEvent(gpuExec);
14804 size_t sz = v.size();
14805 size_t sz2 = v2.size();
14806 size_t sz3 = v3.size();
14807 size_t typesz =
sizeof(int) * sz;
14808 size_t typesz2 =
sizeof(float) * sz2;
14809 size_t typesz3 =
sizeof(char) * sz3;
14810 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14814 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14821 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14825 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14827 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14829 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14831 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14832 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14833 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14834 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14836 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
14838 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14840 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14843 size_t size[3] = {sz, sz2, sz3};
14844 size_t work_dimension = 3;
14847 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14848 work_dimension = 1;
14850 else if(temp_sz > 0){
14852 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14853 For default multidimensional global work size, leave the global_work_size vector empty, \ 14854 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14855 the global_work_size vector.");
14859 work_dimension = 1;
14861 else if (temp_sz == 2){
14864 work_dimension = 2;
14871 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14878 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14880 clWaitForEvents(1, &gpuExec);
14882 int *result = (
int *) malloc(typesz);
14883 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14885 std::vector<int> res = std::vector<int>();
14886 res.assign(result, result+sz);
14888 clReleaseCommandQueue (queue);
14889 clReleaseMemObject(buffer);
14890 clReleaseMemObject(buffer2);
14891 clReleaseMemObject(buffer3);
14892 clReleaseEvent(gpuExec);
14899 size_t sz = v->size();
14900 size_t sz2 = v2.size();
14901 size_t sz3 = v3.size();
14902 size_t typesz =
sizeof(int) * sz;
14903 size_t typesz2 =
sizeof(float) * sz2;
14904 size_t typesz3 =
sizeof(char) * sz3;
14905 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
14909 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
14916 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
14920 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
14922 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
14924 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
14926 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
14927 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
14928 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
14929 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
14931 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
14933 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
14935 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
14938 size_t size[3] = {sz, sz2, sz3};
14939 size_t work_dimension = 3;
14942 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
14943 work_dimension = 1;
14945 else if(temp_sz > 0){
14947 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 14948 For default multidimensional global work size, leave the global_work_size vector empty, \ 14949 and set multi_dimensional to true. Setting the global work size based on the values inside \ 14950 the global_work_size vector.");
14954 work_dimension = 1;
14956 else if (temp_sz == 2){
14959 work_dimension = 2;
14966 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
14973 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14975 clWaitForEvents(1, &gpuExec);
14977 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
14979 clWaitForEvents(1, &gpuExec);
14981 int *result = (
int *) malloc(typesz);
14982 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
14984 v->assign(result, result+sz);
14986 clReleaseCommandQueue (queue);
14987 clReleaseMemObject(buffer);
14988 clReleaseMemObject(buffer2);
14989 clReleaseMemObject(buffer3);
14990 clReleaseEvent(gpuExec);
14995 size_t sz = v->size();
14996 size_t sz2 = v2->size();
14997 size_t sz3 = v3.size();
14998 size_t typesz =
sizeof(int) * sz;
14999 size_t typesz2 =
sizeof(float) * sz2;
15000 size_t typesz3 =
sizeof(char) * sz3;
15001 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15005 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15012 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15016 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15018 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15020 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15022 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15023 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15024 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15025 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15027 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15029 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15031 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15034 size_t size[3] = {sz, sz2, sz3};
15035 size_t work_dimension = 3;
15038 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15039 work_dimension = 1;
15041 else if(temp_sz > 0){
15043 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15044 For default multidimensional global work size, leave the global_work_size vector empty, \ 15045 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15046 the global_work_size vector.");
15050 work_dimension = 1;
15052 else if (temp_sz == 2){
15055 work_dimension = 2;
15062 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15069 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15071 clWaitForEvents(1, &gpuExec);
15073 int *result = (
int *) malloc(typesz);
15074 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15076 v->assign(result, result+sz);
15078 if (typesz2 != typesz or sz != sz2){
15080 result2 = (
float *) malloc(typesz2);
15081 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15083 v2->assign(result2, result2+sz2);
15087 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15089 v2->assign(result, result+sz2);
15092 clReleaseCommandQueue (queue);
15093 clReleaseMemObject(buffer);
15094 clReleaseMemObject(buffer2);
15095 clReleaseMemObject(buffer3);
15096 clReleaseEvent(gpuExec);
15101 size_t sz = v->size();
15102 size_t sz2 = v2->size();
15103 size_t sz3 = v3->size();
15104 size_t typesz =
sizeof(int) * sz;
15105 size_t typesz2 =
sizeof(float) * sz2;
15106 size_t typesz3 =
sizeof(char) * sz3;
15107 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15111 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15118 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15122 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15124 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15126 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15128 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15129 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15130 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15131 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15133 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15135 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15137 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15140 size_t size[3] = {sz, sz2, sz3};
15141 size_t work_dimension = 3;
15144 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15145 work_dimension = 1;
15147 else if(temp_sz > 0){
15149 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15150 For default multidimensional global work size, leave the global_work_size vector empty, \ 15151 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15152 the global_work_size vector.");
15156 work_dimension = 1;
15158 else if (temp_sz == 2){
15161 work_dimension = 2;
15168 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15175 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15177 clWaitForEvents(1, &gpuExec);
15179 int *result = (
int *) malloc(typesz);
15180 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15182 v->assign(result, result+sz);
15184 if (typesz2 != typesz or sz != sz2){
15186 result2 = (
float *) malloc(typesz2);
15187 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15189 v2->assign(result2, result2+sz2);
15193 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15195 v2->assign(result, result+sz2);
15198 if (typesz3 != typesz or sz != sz3){
15200 result3 = (
char *) malloc(typesz3);
15201 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
15203 v3->assign(result3, result3+sz3);
15207 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15209 v3->assign(result, result+sz3);
15212 clReleaseCommandQueue (queue);
15213 clReleaseMemObject(buffer);
15214 clReleaseMemObject(buffer2);
15215 clReleaseMemObject(buffer3);
15216 clReleaseEvent(gpuExec);
15222 size_t sz = v.size();
15223 size_t sz2 = v2.size();
15224 size_t sz3 = v3.size();
15225 size_t typesz =
sizeof(int) * sz;
15226 size_t typesz2 =
sizeof(float) * sz2;
15227 size_t typesz3 =
sizeof(int) * sz3;
15228 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15232 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15239 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15243 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15245 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15247 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15249 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15250 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15251 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15252 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15254 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
15256 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15258 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15261 size_t size[3] = {sz, sz2, sz3};
15262 size_t work_dimension = 3;
15265 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15266 work_dimension = 1;
15268 else if(temp_sz > 0){
15270 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15271 For default multidimensional global work size, leave the global_work_size vector empty, \ 15272 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15273 the global_work_size vector.");
15277 work_dimension = 1;
15279 else if (temp_sz == 2){
15282 work_dimension = 2;
15289 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15296 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15298 clWaitForEvents(1, &gpuExec);
15300 int *result = (
int *) malloc(typesz);
15301 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15303 std::vector<int> res = std::vector<int>();
15304 res.assign(result, result+sz);
15306 clReleaseCommandQueue (queue);
15307 clReleaseMemObject(buffer);
15308 clReleaseMemObject(buffer2);
15309 clReleaseMemObject(buffer3);
15310 clReleaseEvent(gpuExec);
15317 size_t sz = v->size();
15318 size_t sz2 = v2.size();
15319 size_t sz3 = v3.size();
15320 size_t typesz =
sizeof(int) * sz;
15321 size_t typesz2 =
sizeof(float) * sz2;
15322 size_t typesz3 =
sizeof(int) * sz3;
15323 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15327 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15334 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15338 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15340 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15342 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15344 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15345 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15346 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15347 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15349 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15351 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15353 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15356 size_t size[3] = {sz, sz2, sz3};
15357 size_t work_dimension = 3;
15360 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15361 work_dimension = 1;
15363 else if(temp_sz > 0){
15365 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15366 For default multidimensional global work size, leave the global_work_size vector empty, \ 15367 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15368 the global_work_size vector.");
15372 work_dimension = 1;
15374 else if (temp_sz == 2){
15377 work_dimension = 2;
15384 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15391 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15393 clWaitForEvents(1, &gpuExec);
15395 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15397 clWaitForEvents(1, &gpuExec);
15399 int *result = (
int *) malloc(typesz);
15400 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15402 v->assign(result, result+sz);
15404 clReleaseCommandQueue (queue);
15405 clReleaseMemObject(buffer);
15406 clReleaseMemObject(buffer2);
15407 clReleaseMemObject(buffer3);
15408 clReleaseEvent(gpuExec);
15413 size_t sz = v->size();
15414 size_t sz2 = v2->size();
15415 size_t sz3 = v3.size();
15416 size_t typesz =
sizeof(int) * sz;
15417 size_t typesz2 =
sizeof(float) * sz2;
15418 size_t typesz3 =
sizeof(int) * sz3;
15419 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15423 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15430 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15434 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15436 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15438 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15440 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15441 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15442 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15443 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15445 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15447 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15449 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15452 size_t size[3] = {sz, sz2, sz3};
15453 size_t work_dimension = 3;
15456 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15457 work_dimension = 1;
15459 else if(temp_sz > 0){
15461 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15462 For default multidimensional global work size, leave the global_work_size vector empty, \ 15463 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15464 the global_work_size vector.");
15468 work_dimension = 1;
15470 else if (temp_sz == 2){
15473 work_dimension = 2;
15480 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15487 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15489 clWaitForEvents(1, &gpuExec);
15491 int *result = (
int *) malloc(typesz);
15492 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15494 v->assign(result, result+sz);
15496 if (typesz2 != typesz or sz != sz2){
15498 result2 = (
float *) malloc(typesz2);
15499 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15501 v2->assign(result2, result2+sz2);
15505 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15507 v2->assign(result, result+sz2);
15510 clReleaseCommandQueue (queue);
15511 clReleaseMemObject(buffer);
15512 clReleaseMemObject(buffer2);
15513 clReleaseMemObject(buffer3);
15514 clReleaseEvent(gpuExec);
15519 size_t sz = v->size();
15520 size_t sz2 = v2->size();
15521 size_t sz3 = v3->size();
15522 size_t typesz =
sizeof(int) * sz;
15523 size_t typesz2 =
sizeof(float) * sz2;
15524 size_t typesz3 =
sizeof(int) * sz3;
15525 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15529 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15536 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15540 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15542 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15544 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15546 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15547 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15548 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15549 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15551 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15553 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15555 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15558 size_t size[3] = {sz, sz2, sz3};
15559 size_t work_dimension = 3;
15562 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15563 work_dimension = 1;
15565 else if(temp_sz > 0){
15567 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15568 For default multidimensional global work size, leave the global_work_size vector empty, \ 15569 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15570 the global_work_size vector.");
15574 work_dimension = 1;
15576 else if (temp_sz == 2){
15579 work_dimension = 2;
15586 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15593 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15595 clWaitForEvents(1, &gpuExec);
15597 int *result = (
int *) malloc(typesz);
15598 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15600 v->assign(result, result+sz);
15602 if (typesz2 != typesz or sz != sz2){
15604 result2 = (
float *) malloc(typesz2);
15605 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15607 v2->assign(result2, result2+sz2);
15611 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15613 v2->assign(result, result+sz2);
15616 if (typesz3 != typesz or sz != sz3){
15618 result3 = (
int *) malloc(typesz3);
15619 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
15621 v3->assign(result3, result3+sz3);
15625 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15627 v3->assign(result, result+sz3);
15630 clReleaseCommandQueue (queue);
15631 clReleaseMemObject(buffer);
15632 clReleaseMemObject(buffer2);
15633 clReleaseMemObject(buffer3);
15634 clReleaseEvent(gpuExec);
15640 size_t sz = v.size();
15641 size_t sz2 = v2.size();
15642 size_t sz3 = v3.size();
15643 size_t typesz =
sizeof(int) * sz;
15644 size_t typesz2 =
sizeof(float) * sz2;
15645 size_t typesz3 =
sizeof(float) * sz3;
15646 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15650 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15657 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15661 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15663 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15665 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15667 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15668 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15669 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15670 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15672 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
15674 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15676 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15679 size_t size[3] = {sz, sz2, sz3};
15680 size_t work_dimension = 3;
15683 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15684 work_dimension = 1;
15686 else if(temp_sz > 0){
15688 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15689 For default multidimensional global work size, leave the global_work_size vector empty, \ 15690 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15691 the global_work_size vector.");
15695 work_dimension = 1;
15697 else if (temp_sz == 2){
15700 work_dimension = 2;
15707 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15714 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15716 clWaitForEvents(1, &gpuExec);
15718 int *result = (
int *) malloc(typesz);
15719 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15721 std::vector<int> res = std::vector<int>();
15722 res.assign(result, result+sz);
15724 clReleaseCommandQueue (queue);
15725 clReleaseMemObject(buffer);
15726 clReleaseMemObject(buffer2);
15727 clReleaseMemObject(buffer3);
15728 clReleaseEvent(gpuExec);
15735 size_t sz = v->size();
15736 size_t sz2 = v2.size();
15737 size_t sz3 = v3.size();
15738 size_t typesz =
sizeof(int) * sz;
15739 size_t typesz2 =
sizeof(float) * sz2;
15740 size_t typesz3 =
sizeof(float) * sz3;
15741 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15745 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15752 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15756 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15758 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
15760 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15762 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15763 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15764 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15765 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15767 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15769 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
15771 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15774 size_t size[3] = {sz, sz2, sz3};
15775 size_t work_dimension = 3;
15778 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15779 work_dimension = 1;
15781 else if(temp_sz > 0){
15783 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15784 For default multidimensional global work size, leave the global_work_size vector empty, \ 15785 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15786 the global_work_size vector.");
15790 work_dimension = 1;
15792 else if (temp_sz == 2){
15795 work_dimension = 2;
15802 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15809 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15811 clWaitForEvents(1, &gpuExec);
15813 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15815 clWaitForEvents(1, &gpuExec);
15817 int *result = (
int *) malloc(typesz);
15818 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15820 v->assign(result, result+sz);
15822 clReleaseCommandQueue (queue);
15823 clReleaseMemObject(buffer);
15824 clReleaseMemObject(buffer2);
15825 clReleaseMemObject(buffer3);
15826 clReleaseEvent(gpuExec);
15831 size_t sz = v->size();
15832 size_t sz2 = v2->size();
15833 size_t sz3 = v3.size();
15834 size_t typesz =
sizeof(int) * sz;
15835 size_t typesz2 =
sizeof(float) * sz2;
15836 size_t typesz3 =
sizeof(float) * sz3;
15837 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15841 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15848 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15852 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15854 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15856 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
15858 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15859 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15860 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15861 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15863 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15865 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15867 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
15870 size_t size[3] = {sz, sz2, sz3};
15871 size_t work_dimension = 3;
15874 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15875 work_dimension = 1;
15877 else if(temp_sz > 0){
15879 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15880 For default multidimensional global work size, leave the global_work_size vector empty, \ 15881 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15882 the global_work_size vector.");
15886 work_dimension = 1;
15888 else if (temp_sz == 2){
15891 work_dimension = 2;
15898 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
15905 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
15907 clWaitForEvents(1, &gpuExec);
15909 int *result = (
int *) malloc(typesz);
15910 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
15912 v->assign(result, result+sz);
15914 if (typesz2 != typesz or sz != sz2){
15916 result2 = (
float *) malloc(typesz2);
15917 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
15919 v2->assign(result2, result2+sz2);
15923 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
15925 v2->assign(result, result+sz2);
15928 clReleaseCommandQueue (queue);
15929 clReleaseMemObject(buffer);
15930 clReleaseMemObject(buffer2);
15931 clReleaseMemObject(buffer3);
15932 clReleaseEvent(gpuExec);
15937 size_t sz = v->size();
15938 size_t sz2 = v2->size();
15939 size_t sz3 = v3->size();
15940 size_t typesz =
sizeof(int) * sz;
15941 size_t typesz2 =
sizeof(float) * sz2;
15942 size_t typesz3 =
sizeof(float) * sz3;
15943 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
15947 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
15954 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
15958 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
15960 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
15962 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
15964 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
15965 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
15966 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
15967 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
15969 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
15971 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
15973 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
15976 size_t size[3] = {sz, sz2, sz3};
15977 size_t work_dimension = 3;
15980 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
15981 work_dimension = 1;
15983 else if(temp_sz > 0){
15985 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 15986 For default multidimensional global work size, leave the global_work_size vector empty, \ 15987 and set multi_dimensional to true. Setting the global work size based on the values inside \ 15988 the global_work_size vector.");
15992 work_dimension = 1;
15994 else if (temp_sz == 2){
15997 work_dimension = 2;
16004 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16011 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16013 clWaitForEvents(1, &gpuExec);
16015 int *result = (
int *) malloc(typesz);
16016 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16018 v->assign(result, result+sz);
16020 if (typesz2 != typesz or sz != sz2){
16022 result2 = (
float *) malloc(typesz2);
16023 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16025 v2->assign(result2, result2+sz2);
16029 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16031 v2->assign(result, result+sz2);
16034 if (typesz3 != typesz or sz != sz3){
16036 result3 = (
float *) malloc(typesz3);
16037 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16039 v3->assign(result3, result3+sz3);
16043 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16045 v3->assign(result, result+sz3);
16048 clReleaseCommandQueue (queue);
16049 clReleaseMemObject(buffer);
16050 clReleaseMemObject(buffer2);
16051 clReleaseMemObject(buffer3);
16052 clReleaseEvent(gpuExec);
16058 size_t sz = v.size();
16059 size_t sz2 = v2.size();
16060 size_t sz3 = v3.size();
16061 size_t typesz =
sizeof(int) * sz;
16062 size_t typesz2 =
sizeof(float) * sz2;
16063 size_t typesz3 =
sizeof(double) * sz3;
16064 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16068 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16075 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16079 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16081 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16083 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16085 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16086 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16087 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16088 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16090 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16092 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16094 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16097 size_t size[3] = {sz, sz2, sz3};
16098 size_t work_dimension = 3;
16101 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16102 work_dimension = 1;
16104 else if(temp_sz > 0){
16106 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16107 For default multidimensional global work size, leave the global_work_size vector empty, \ 16108 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16109 the global_work_size vector.");
16113 work_dimension = 1;
16115 else if (temp_sz == 2){
16118 work_dimension = 2;
16125 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16132 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16134 clWaitForEvents(1, &gpuExec);
16136 int *result = (
int *) malloc(typesz);
16137 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16139 std::vector<int> res = std::vector<int>();
16140 res.assign(result, result+sz);
16142 clReleaseCommandQueue (queue);
16143 clReleaseMemObject(buffer);
16144 clReleaseMemObject(buffer2);
16145 clReleaseMemObject(buffer3);
16146 clReleaseEvent(gpuExec);
16153 size_t sz = v->size();
16154 size_t sz2 = v2.size();
16155 size_t sz3 = v3.size();
16156 size_t typesz =
sizeof(int) * sz;
16157 size_t typesz2 =
sizeof(float) * sz2;
16158 size_t typesz3 =
sizeof(double) * sz3;
16159 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16163 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16170 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16174 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16176 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16178 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16180 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16181 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16182 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16183 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16185 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16187 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16189 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16192 size_t size[3] = {sz, sz2, sz3};
16193 size_t work_dimension = 3;
16196 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16197 work_dimension = 1;
16199 else if(temp_sz > 0){
16201 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16202 For default multidimensional global work size, leave the global_work_size vector empty, \ 16203 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16204 the global_work_size vector.");
16208 work_dimension = 1;
16210 else if (temp_sz == 2){
16213 work_dimension = 2;
16220 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16227 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16229 clWaitForEvents(1, &gpuExec);
16231 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16233 clWaitForEvents(1, &gpuExec);
16235 int *result = (
int *) malloc(typesz);
16236 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16238 v->assign(result, result+sz);
16240 clReleaseCommandQueue (queue);
16241 clReleaseMemObject(buffer);
16242 clReleaseMemObject(buffer2);
16243 clReleaseMemObject(buffer3);
16244 clReleaseEvent(gpuExec);
16249 size_t sz = v->size();
16250 size_t sz2 = v2->size();
16251 size_t sz3 = v3.size();
16252 size_t typesz =
sizeof(int) * sz;
16253 size_t typesz2 =
sizeof(float) * sz2;
16254 size_t typesz3 =
sizeof(double) * sz3;
16255 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16259 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16266 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16270 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16272 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16274 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16276 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16277 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16278 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16279 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16281 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16283 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16285 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16288 size_t size[3] = {sz, sz2, sz3};
16289 size_t work_dimension = 3;
16292 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16293 work_dimension = 1;
16295 else if(temp_sz > 0){
16297 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16298 For default multidimensional global work size, leave the global_work_size vector empty, \ 16299 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16300 the global_work_size vector.");
16304 work_dimension = 1;
16306 else if (temp_sz == 2){
16309 work_dimension = 2;
16316 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16323 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16325 clWaitForEvents(1, &gpuExec);
16327 int *result = (
int *) malloc(typesz);
16328 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16330 v->assign(result, result+sz);
16332 if (typesz2 != typesz or sz != sz2){
16334 result2 = (
float *) malloc(typesz2);
16335 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16337 v2->assign(result2, result2+sz2);
16341 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16343 v2->assign(result, result+sz2);
16346 clReleaseCommandQueue (queue);
16347 clReleaseMemObject(buffer);
16348 clReleaseMemObject(buffer2);
16349 clReleaseMemObject(buffer3);
16350 clReleaseEvent(gpuExec);
16355 size_t sz = v->size();
16356 size_t sz2 = v2->size();
16357 size_t sz3 = v3->size();
16358 size_t typesz =
sizeof(int) * sz;
16359 size_t typesz2 =
sizeof(float) * sz2;
16360 size_t typesz3 =
sizeof(double) * sz3;
16361 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16365 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16372 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16376 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16378 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16380 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
16382 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16383 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16384 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16385 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16387 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16389 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16391 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
16394 size_t size[3] = {sz, sz2, sz3};
16395 size_t work_dimension = 3;
16398 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16399 work_dimension = 1;
16401 else if(temp_sz > 0){
16403 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16404 For default multidimensional global work size, leave the global_work_size vector empty, \ 16405 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16406 the global_work_size vector.");
16410 work_dimension = 1;
16412 else if (temp_sz == 2){
16415 work_dimension = 2;
16422 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16429 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16431 clWaitForEvents(1, &gpuExec);
16433 int *result = (
int *) malloc(typesz);
16434 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16436 v->assign(result, result+sz);
16438 if (typesz2 != typesz or sz != sz2){
16440 result2 = (
float *) malloc(typesz2);
16441 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16443 v2->assign(result2, result2+sz2);
16447 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16449 v2->assign(result, result+sz2);
16452 if (typesz3 != typesz or sz != sz3){
16454 result3 = (
double *) malloc(typesz3);
16455 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16457 v3->assign(result3, result3+sz3);
16461 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16463 v3->assign(result, result+sz3);
16466 clReleaseCommandQueue (queue);
16467 clReleaseMemObject(buffer);
16468 clReleaseMemObject(buffer2);
16469 clReleaseMemObject(buffer3);
16470 clReleaseEvent(gpuExec);
16476 size_t sz = v.size();
16477 size_t sz2 = v2.size();
16478 size_t sz3 = v3.size();
16479 size_t typesz =
sizeof(int) * sz;
16480 size_t typesz2 =
sizeof(double) * sz2;
16481 size_t typesz3 =
sizeof(char) * sz3;
16482 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16486 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16493 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16497 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16499 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16501 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16503 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16504 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16505 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16506 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16508 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16510 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16512 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16515 size_t size[3] = {sz, sz2, sz3};
16516 size_t work_dimension = 3;
16519 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16520 work_dimension = 1;
16522 else if(temp_sz > 0){
16524 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16525 For default multidimensional global work size, leave the global_work_size vector empty, \ 16526 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16527 the global_work_size vector.");
16531 work_dimension = 1;
16533 else if (temp_sz == 2){
16536 work_dimension = 2;
16543 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16550 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16552 clWaitForEvents(1, &gpuExec);
16554 int *result = (
int *) malloc(typesz);
16555 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16557 std::vector<int> res = std::vector<int>();
16558 res.assign(result, result+sz);
16560 clReleaseCommandQueue (queue);
16561 clReleaseMemObject(buffer);
16562 clReleaseMemObject(buffer2);
16563 clReleaseMemObject(buffer3);
16564 clReleaseEvent(gpuExec);
16571 size_t sz = v->size();
16572 size_t sz2 = v2.size();
16573 size_t sz3 = v3.size();
16574 size_t typesz =
sizeof(int) * sz;
16575 size_t typesz2 =
sizeof(double) * sz2;
16576 size_t typesz3 =
sizeof(char) * sz3;
16577 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16581 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16588 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16592 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16594 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16596 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16598 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16599 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16600 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16601 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16603 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16605 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16607 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16610 size_t size[3] = {sz, sz2, sz3};
16611 size_t work_dimension = 3;
16614 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16615 work_dimension = 1;
16617 else if(temp_sz > 0){
16619 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16620 For default multidimensional global work size, leave the global_work_size vector empty, \ 16621 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16622 the global_work_size vector.");
16626 work_dimension = 1;
16628 else if (temp_sz == 2){
16631 work_dimension = 2;
16638 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16645 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16647 clWaitForEvents(1, &gpuExec);
16649 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16651 clWaitForEvents(1, &gpuExec);
16653 int *result = (
int *) malloc(typesz);
16654 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16656 v->assign(result, result+sz);
16658 clReleaseCommandQueue (queue);
16659 clReleaseMemObject(buffer);
16660 clReleaseMemObject(buffer2);
16661 clReleaseMemObject(buffer3);
16662 clReleaseEvent(gpuExec);
16667 size_t sz = v->size();
16668 size_t sz2 = v2->size();
16669 size_t sz3 = v3.size();
16670 size_t typesz =
sizeof(int) * sz;
16671 size_t typesz2 =
sizeof(double) * sz2;
16672 size_t typesz3 =
sizeof(char) * sz3;
16673 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16677 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16684 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16688 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16690 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16692 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16694 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16695 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16696 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16697 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16699 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16701 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16703 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16706 size_t size[3] = {sz, sz2, sz3};
16707 size_t work_dimension = 3;
16710 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16711 work_dimension = 1;
16713 else if(temp_sz > 0){
16715 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16716 For default multidimensional global work size, leave the global_work_size vector empty, \ 16717 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16718 the global_work_size vector.");
16722 work_dimension = 1;
16724 else if (temp_sz == 2){
16727 work_dimension = 2;
16734 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16741 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16743 clWaitForEvents(1, &gpuExec);
16745 int *result = (
int *) malloc(typesz);
16746 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16748 v->assign(result, result+sz);
16750 if (typesz2 != typesz or sz != sz2){
16752 result2 = (
double *) malloc(typesz2);
16753 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16755 v2->assign(result2, result2+sz2);
16759 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16761 v2->assign(result, result+sz2);
16764 clReleaseCommandQueue (queue);
16765 clReleaseMemObject(buffer);
16766 clReleaseMemObject(buffer2);
16767 clReleaseMemObject(buffer3);
16768 clReleaseEvent(gpuExec);
16773 size_t sz = v->size();
16774 size_t sz2 = v2->size();
16775 size_t sz3 = v3->size();
16776 size_t typesz =
sizeof(int) * sz;
16777 size_t typesz2 =
sizeof(double) * sz2;
16778 size_t typesz3 =
sizeof(char) * sz3;
16779 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16783 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16790 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16794 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16796 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
16798 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
16800 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16801 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16802 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16803 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16805 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
16807 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
16809 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
16812 size_t size[3] = {sz, sz2, sz3};
16813 size_t work_dimension = 3;
16816 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16817 work_dimension = 1;
16819 else if(temp_sz > 0){
16821 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16822 For default multidimensional global work size, leave the global_work_size vector empty, \ 16823 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16824 the global_work_size vector.");
16828 work_dimension = 1;
16830 else if (temp_sz == 2){
16833 work_dimension = 2;
16840 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16847 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16849 clWaitForEvents(1, &gpuExec);
16851 int *result = (
int *) malloc(typesz);
16852 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16854 v->assign(result, result+sz);
16856 if (typesz2 != typesz or sz != sz2){
16858 result2 = (
double *) malloc(typesz2);
16859 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
16861 v2->assign(result2, result2+sz2);
16865 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16867 v2->assign(result, result+sz2);
16870 if (typesz3 != typesz or sz != sz3){
16872 result3 = (
char *) malloc(typesz3);
16873 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
16875 v3->assign(result3, result3+sz3);
16879 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
16881 v3->assign(result, result+sz3);
16884 clReleaseCommandQueue (queue);
16885 clReleaseMemObject(buffer);
16886 clReleaseMemObject(buffer2);
16887 clReleaseMemObject(buffer3);
16888 clReleaseEvent(gpuExec);
16894 size_t sz = v.size();
16895 size_t sz2 = v2.size();
16896 size_t sz3 = v3.size();
16897 size_t typesz =
sizeof(int) * sz;
16898 size_t typesz2 =
sizeof(double) * sz2;
16899 size_t typesz3 =
sizeof(int) * sz3;
16900 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16904 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
16911 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
16915 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
16917 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
16919 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
16921 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
16922 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
16923 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
16924 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
16926 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
16928 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
16930 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
16933 size_t size[3] = {sz, sz2, sz3};
16934 size_t work_dimension = 3;
16937 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
16938 work_dimension = 1;
16940 else if(temp_sz > 0){
16942 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 16943 For default multidimensional global work size, leave the global_work_size vector empty, \ 16944 and set multi_dimensional to true. Setting the global work size based on the values inside \ 16945 the global_work_size vector.");
16949 work_dimension = 1;
16951 else if (temp_sz == 2){
16954 work_dimension = 2;
16961 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
16968 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
16970 clWaitForEvents(1, &gpuExec);
16972 int *result = (
int *) malloc(typesz);
16973 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
16975 std::vector<int> res = std::vector<int>();
16976 res.assign(result, result+sz);
16978 clReleaseCommandQueue (queue);
16979 clReleaseMemObject(buffer);
16980 clReleaseMemObject(buffer2);
16981 clReleaseMemObject(buffer3);
16982 clReleaseEvent(gpuExec);
16989 size_t sz = v->size();
16990 size_t sz2 = v2.size();
16991 size_t sz3 = v3.size();
16992 size_t typesz =
sizeof(int) * sz;
16993 size_t typesz2 =
sizeof(double) * sz2;
16994 size_t typesz3 =
sizeof(int) * sz3;
16995 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
16999 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17006 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17010 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17012 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17014 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17016 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17017 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17018 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17019 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17021 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17023 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17025 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17028 size_t size[3] = {sz, sz2, sz3};
17029 size_t work_dimension = 3;
17032 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17033 work_dimension = 1;
17035 else if(temp_sz > 0){
17037 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17038 For default multidimensional global work size, leave the global_work_size vector empty, \ 17039 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17040 the global_work_size vector.");
17044 work_dimension = 1;
17046 else if (temp_sz == 2){
17049 work_dimension = 2;
17056 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17063 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17065 clWaitForEvents(1, &gpuExec);
17067 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17069 clWaitForEvents(1, &gpuExec);
17071 int *result = (
int *) malloc(typesz);
17072 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17074 v->assign(result, result+sz);
17076 clReleaseCommandQueue (queue);
17077 clReleaseMemObject(buffer);
17078 clReleaseMemObject(buffer2);
17079 clReleaseMemObject(buffer3);
17080 clReleaseEvent(gpuExec);
17085 size_t sz = v->size();
17086 size_t sz2 = v2->size();
17087 size_t sz3 = v3.size();
17088 size_t typesz =
sizeof(int) * sz;
17089 size_t typesz2 =
sizeof(double) * sz2;
17090 size_t typesz3 =
sizeof(int) * sz3;
17091 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17095 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17102 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17106 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17108 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17110 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17112 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17113 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17114 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17115 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17117 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17119 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17121 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17124 size_t size[3] = {sz, sz2, sz3};
17125 size_t work_dimension = 3;
17128 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17129 work_dimension = 1;
17131 else if(temp_sz > 0){
17133 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17134 For default multidimensional global work size, leave the global_work_size vector empty, \ 17135 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17136 the global_work_size vector.");
17140 work_dimension = 1;
17142 else if (temp_sz == 2){
17145 work_dimension = 2;
17152 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17159 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17161 clWaitForEvents(1, &gpuExec);
17163 int *result = (
int *) malloc(typesz);
17164 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17166 v->assign(result, result+sz);
17168 if (typesz2 != typesz or sz != sz2){
17170 result2 = (
double *) malloc(typesz2);
17171 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17173 v2->assign(result2, result2+sz2);
17177 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17179 v2->assign(result, result+sz2);
17182 clReleaseCommandQueue (queue);
17183 clReleaseMemObject(buffer);
17184 clReleaseMemObject(buffer2);
17185 clReleaseMemObject(buffer3);
17186 clReleaseEvent(gpuExec);
17191 size_t sz = v->size();
17192 size_t sz2 = v2->size();
17193 size_t sz3 = v3->size();
17194 size_t typesz =
sizeof(int) * sz;
17195 size_t typesz2 =
sizeof(double) * sz2;
17196 size_t typesz3 =
sizeof(int) * sz3;
17197 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17201 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17208 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17212 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17214 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17216 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
17218 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17219 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17220 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17221 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17223 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17225 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17227 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
17230 size_t size[3] = {sz, sz2, sz3};
17231 size_t work_dimension = 3;
17234 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17235 work_dimension = 1;
17237 else if(temp_sz > 0){
17239 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17240 For default multidimensional global work size, leave the global_work_size vector empty, \ 17241 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17242 the global_work_size vector.");
17246 work_dimension = 1;
17248 else if (temp_sz == 2){
17251 work_dimension = 2;
17258 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17265 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17267 clWaitForEvents(1, &gpuExec);
17269 int *result = (
int *) malloc(typesz);
17270 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17272 v->assign(result, result+sz);
17274 if (typesz2 != typesz or sz != sz2){
17276 result2 = (
double *) malloc(typesz2);
17277 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17279 v2->assign(result2, result2+sz2);
17283 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17285 v2->assign(result, result+sz2);
17288 if (typesz3 != typesz or sz != sz3){
17290 result3 = (
int *) malloc(typesz3);
17291 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
17293 v3->assign(result3, result3+sz3);
17297 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17299 v3->assign(result, result+sz3);
17302 clReleaseCommandQueue (queue);
17303 clReleaseMemObject(buffer);
17304 clReleaseMemObject(buffer2);
17305 clReleaseMemObject(buffer3);
17306 clReleaseEvent(gpuExec);
17312 size_t sz = v.size();
17313 size_t sz2 = v2.size();
17314 size_t sz3 = v3.size();
17315 size_t typesz =
sizeof(int) * sz;
17316 size_t typesz2 =
sizeof(double) * sz2;
17317 size_t typesz3 =
sizeof(float) * sz3;
17318 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17322 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17329 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17333 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17335 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17337 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17339 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17340 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17341 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17342 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17344 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
17346 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17348 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17351 size_t size[3] = {sz, sz2, sz3};
17352 size_t work_dimension = 3;
17355 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17356 work_dimension = 1;
17358 else if(temp_sz > 0){
17360 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17361 For default multidimensional global work size, leave the global_work_size vector empty, \ 17362 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17363 the global_work_size vector.");
17367 work_dimension = 1;
17369 else if (temp_sz == 2){
17372 work_dimension = 2;
17379 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17386 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17388 clWaitForEvents(1, &gpuExec);
17390 int *result = (
int *) malloc(typesz);
17391 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17393 std::vector<int> res = std::vector<int>();
17394 res.assign(result, result+sz);
17396 clReleaseCommandQueue (queue);
17397 clReleaseMemObject(buffer);
17398 clReleaseMemObject(buffer2);
17399 clReleaseMemObject(buffer3);
17400 clReleaseEvent(gpuExec);
17407 size_t sz = v->size();
17408 size_t sz2 = v2.size();
17409 size_t sz3 = v3.size();
17410 size_t typesz =
sizeof(int) * sz;
17411 size_t typesz2 =
sizeof(double) * sz2;
17412 size_t typesz3 =
sizeof(float) * sz3;
17413 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17417 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17424 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17428 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17430 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17432 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17434 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17435 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17436 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17437 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17439 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17441 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17443 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17446 size_t size[3] = {sz, sz2, sz3};
17447 size_t work_dimension = 3;
17450 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17451 work_dimension = 1;
17453 else if(temp_sz > 0){
17455 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17456 For default multidimensional global work size, leave the global_work_size vector empty, \ 17457 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17458 the global_work_size vector.");
17462 work_dimension = 1;
17464 else if (temp_sz == 2){
17467 work_dimension = 2;
17474 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17481 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17483 clWaitForEvents(1, &gpuExec);
17485 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17487 clWaitForEvents(1, &gpuExec);
17489 int *result = (
int *) malloc(typesz);
17490 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17492 v->assign(result, result+sz);
17494 clReleaseCommandQueue (queue);
17495 clReleaseMemObject(buffer);
17496 clReleaseMemObject(buffer2);
17497 clReleaseMemObject(buffer3);
17498 clReleaseEvent(gpuExec);
17503 size_t sz = v->size();
17504 size_t sz2 = v2->size();
17505 size_t sz3 = v3.size();
17506 size_t typesz =
sizeof(int) * sz;
17507 size_t typesz2 =
sizeof(double) * sz2;
17508 size_t typesz3 =
sizeof(float) * sz3;
17509 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17513 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17520 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17524 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17526 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17528 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17530 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17531 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17532 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17533 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17535 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17537 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17539 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17542 size_t size[3] = {sz, sz2, sz3};
17543 size_t work_dimension = 3;
17546 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17547 work_dimension = 1;
17549 else if(temp_sz > 0){
17551 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17552 For default multidimensional global work size, leave the global_work_size vector empty, \ 17553 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17554 the global_work_size vector.");
17558 work_dimension = 1;
17560 else if (temp_sz == 2){
17563 work_dimension = 2;
17570 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17577 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17579 clWaitForEvents(1, &gpuExec);
17581 int *result = (
int *) malloc(typesz);
17582 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17584 v->assign(result, result+sz);
17586 if (typesz2 != typesz or sz != sz2){
17588 result2 = (
double *) malloc(typesz2);
17589 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17591 v2->assign(result2, result2+sz2);
17595 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17597 v2->assign(result, result+sz2);
17600 clReleaseCommandQueue (queue);
17601 clReleaseMemObject(buffer);
17602 clReleaseMemObject(buffer2);
17603 clReleaseMemObject(buffer3);
17604 clReleaseEvent(gpuExec);
17609 size_t sz = v->size();
17610 size_t sz2 = v2->size();
17611 size_t sz3 = v3->size();
17612 size_t typesz =
sizeof(int) * sz;
17613 size_t typesz2 =
sizeof(double) * sz2;
17614 size_t typesz3 =
sizeof(float) * sz3;
17615 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17619 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17626 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17630 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17632 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17634 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
17636 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17637 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17638 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17639 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17641 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17643 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17645 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
17648 size_t size[3] = {sz, sz2, sz3};
17649 size_t work_dimension = 3;
17652 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17653 work_dimension = 1;
17655 else if(temp_sz > 0){
17657 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17658 For default multidimensional global work size, leave the global_work_size vector empty, \ 17659 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17660 the global_work_size vector.");
17664 work_dimension = 1;
17666 else if (temp_sz == 2){
17669 work_dimension = 2;
17676 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17683 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17685 clWaitForEvents(1, &gpuExec);
17687 int *result = (
int *) malloc(typesz);
17688 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17690 v->assign(result, result+sz);
17692 if (typesz2 != typesz or sz != sz2){
17694 result2 = (
double *) malloc(typesz2);
17695 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
17697 v2->assign(result2, result2+sz2);
17701 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17703 v2->assign(result, result+sz2);
17706 if (typesz3 != typesz or sz != sz3){
17708 result3 = (
float *) malloc(typesz3);
17709 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
17711 v3->assign(result3, result3+sz3);
17715 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
17717 v3->assign(result, result+sz3);
17720 clReleaseCommandQueue (queue);
17721 clReleaseMemObject(buffer);
17722 clReleaseMemObject(buffer2);
17723 clReleaseMemObject(buffer3);
17724 clReleaseEvent(gpuExec);
17730 size_t sz = v.size();
17731 size_t sz2 = v2.size();
17732 size_t sz3 = v3.size();
17733 size_t typesz =
sizeof(int) * sz;
17734 size_t typesz2 =
sizeof(double) * sz2;
17735 size_t typesz3 =
sizeof(double) * sz3;
17736 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17740 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17747 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17751 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17753 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17755 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17757 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17758 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17759 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17760 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17762 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
17764 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17766 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17769 size_t size[3] = {sz, sz2, sz3};
17770 size_t work_dimension = 3;
17773 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17774 work_dimension = 1;
17776 else if(temp_sz > 0){
17778 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17779 For default multidimensional global work size, leave the global_work_size vector empty, \ 17780 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17781 the global_work_size vector.");
17785 work_dimension = 1;
17787 else if (temp_sz == 2){
17790 work_dimension = 2;
17797 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17804 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17806 clWaitForEvents(1, &gpuExec);
17808 int *result = (
int *) malloc(typesz);
17809 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17811 std::vector<int> res = std::vector<int>();
17812 res.assign(result, result+sz);
17814 clReleaseCommandQueue (queue);
17815 clReleaseMemObject(buffer);
17816 clReleaseMemObject(buffer2);
17817 clReleaseMemObject(buffer3);
17818 clReleaseEvent(gpuExec);
17825 size_t sz = v->size();
17826 size_t sz2 = v2.size();
17827 size_t sz3 = v3.size();
17828 size_t typesz =
sizeof(int) * sz;
17829 size_t typesz2 =
sizeof(double) * sz2;
17830 size_t typesz3 =
sizeof(double) * sz3;
17831 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17835 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17842 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17846 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17848 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
17850 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17852 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17853 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17854 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17855 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17857 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17859 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
17861 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17864 size_t size[3] = {sz, sz2, sz3};
17865 size_t work_dimension = 3;
17868 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17869 work_dimension = 1;
17871 else if(temp_sz > 0){
17873 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17874 For default multidimensional global work size, leave the global_work_size vector empty, \ 17875 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17876 the global_work_size vector.");
17880 work_dimension = 1;
17882 else if (temp_sz == 2){
17885 work_dimension = 2;
17892 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17899 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17901 clWaitForEvents(1, &gpuExec);
17903 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17905 clWaitForEvents(1, &gpuExec);
17907 int *result = (
int *) malloc(typesz);
17908 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
17910 v->assign(result, result+sz);
17912 clReleaseCommandQueue (queue);
17913 clReleaseMemObject(buffer);
17914 clReleaseMemObject(buffer2);
17915 clReleaseMemObject(buffer3);
17916 clReleaseEvent(gpuExec);
17921 size_t sz = v->size();
17922 size_t sz2 = v2->size();
17923 size_t sz3 = v3.size();
17924 size_t typesz =
sizeof(int) * sz;
17925 size_t typesz2 =
sizeof(double) * sz2;
17926 size_t typesz3 =
sizeof(double) * sz3;
17927 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
17931 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
17938 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
17942 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
17944 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
17946 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
17948 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
17949 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
17950 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
17951 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
17953 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
17955 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
17957 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
17960 size_t size[3] = {sz, sz2, sz3};
17961 size_t work_dimension = 3;
17964 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
17965 work_dimension = 1;
17967 else if(temp_sz > 0){
17969 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 17970 For default multidimensional global work size, leave the global_work_size vector empty, \ 17971 and set multi_dimensional to true. Setting the global work size based on the values inside \ 17972 the global_work_size vector.");
17976 work_dimension = 1;
17978 else if (temp_sz == 2){
17981 work_dimension = 2;
17988 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
17995 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
17997 clWaitForEvents(1, &gpuExec);
17999 int *result = (
int *) malloc(typesz);
18000 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18002 v->assign(result, result+sz);
18004 if (typesz2 != typesz or sz != sz2){
18006 result2 = (
double *) malloc(typesz2);
18007 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18009 v2->assign(result2, result2+sz2);
18013 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18015 v2->assign(result, result+sz2);
18018 clReleaseCommandQueue (queue);
18019 clReleaseMemObject(buffer);
18020 clReleaseMemObject(buffer2);
18021 clReleaseMemObject(buffer3);
18022 clReleaseEvent(gpuExec);
18027 size_t sz = v->size();
18028 size_t sz2 = v2->size();
18029 size_t sz3 = v3->size();
18030 size_t typesz =
sizeof(int) * sz;
18031 size_t typesz2 =
sizeof(double) * sz2;
18032 size_t typesz3 =
sizeof(double) * sz3;
18033 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18037 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18044 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18048 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18050 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18052 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18054 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18055 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18056 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18057 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18059 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18061 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18063 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18066 size_t size[3] = {sz, sz2, sz3};
18067 size_t work_dimension = 3;
18070 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18071 work_dimension = 1;
18073 else if(temp_sz > 0){
18075 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18076 For default multidimensional global work size, leave the global_work_size vector empty, \ 18077 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18078 the global_work_size vector.");
18082 work_dimension = 1;
18084 else if (temp_sz == 2){
18087 work_dimension = 2;
18094 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18101 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18103 clWaitForEvents(1, &gpuExec);
18105 int *result = (
int *) malloc(typesz);
18106 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18108 v->assign(result, result+sz);
18110 if (typesz2 != typesz or sz != sz2){
18112 result2 = (
double *) malloc(typesz2);
18113 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18115 v2->assign(result2, result2+sz2);
18119 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18121 v2->assign(result, result+sz2);
18124 if (typesz3 != typesz or sz != sz3){
18126 result3 = (
double *) malloc(typesz3);
18127 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18129 v3->assign(result3, result3+sz3);
18133 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18135 v3->assign(result, result+sz3);
18138 clReleaseCommandQueue (queue);
18139 clReleaseMemObject(buffer);
18140 clReleaseMemObject(buffer2);
18141 clReleaseMemObject(buffer3);
18142 clReleaseEvent(gpuExec);
18148 size_t sz = v.size();
18149 size_t sz2 = v2.size();
18150 size_t sz3 = v3.size();
18151 size_t typesz =
sizeof(float) * sz;
18152 size_t typesz2 =
sizeof(char) * sz2;
18153 size_t typesz3 =
sizeof(char) * sz3;
18154 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18158 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18165 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18169 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18171 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18173 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18175 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18176 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18177 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18178 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18180 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
18182 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18184 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18187 size_t size[3] = {sz, sz2, sz3};
18188 size_t work_dimension = 3;
18191 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18192 work_dimension = 1;
18194 else if(temp_sz > 0){
18196 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18197 For default multidimensional global work size, leave the global_work_size vector empty, \ 18198 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18199 the global_work_size vector.");
18203 work_dimension = 1;
18205 else if (temp_sz == 2){
18208 work_dimension = 2;
18215 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18222 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18224 clWaitForEvents(1, &gpuExec);
18226 float *result = (
float *) malloc(typesz);
18227 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18229 std::vector<float> res = std::vector<float>();
18230 res.assign(result, result+sz);
18232 clReleaseCommandQueue (queue);
18233 clReleaseMemObject(buffer);
18234 clReleaseMemObject(buffer2);
18235 clReleaseMemObject(buffer3);
18236 clReleaseEvent(gpuExec);
18243 size_t sz = v->size();
18244 size_t sz2 = v2.size();
18245 size_t sz3 = v3.size();
18246 size_t typesz =
sizeof(float) * sz;
18247 size_t typesz2 =
sizeof(char) * sz2;
18248 size_t typesz3 =
sizeof(char) * sz3;
18249 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18253 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18260 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18264 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18266 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18268 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18270 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18271 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18272 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18273 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18275 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18277 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18279 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18282 size_t size[3] = {sz, sz2, sz3};
18283 size_t work_dimension = 3;
18286 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18287 work_dimension = 1;
18289 else if(temp_sz > 0){
18291 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18292 For default multidimensional global work size, leave the global_work_size vector empty, \ 18293 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18294 the global_work_size vector.");
18298 work_dimension = 1;
18300 else if (temp_sz == 2){
18303 work_dimension = 2;
18310 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18317 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18319 clWaitForEvents(1, &gpuExec);
18321 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18323 clWaitForEvents(1, &gpuExec);
18325 float *result = (
float *) malloc(typesz);
18326 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18328 v->assign(result, result+sz);
18330 clReleaseCommandQueue (queue);
18331 clReleaseMemObject(buffer);
18332 clReleaseMemObject(buffer2);
18333 clReleaseMemObject(buffer3);
18334 clReleaseEvent(gpuExec);
18339 size_t sz = v->size();
18340 size_t sz2 = v2->size();
18341 size_t sz3 = v3.size();
18342 size_t typesz =
sizeof(float) * sz;
18343 size_t typesz2 =
sizeof(char) * sz2;
18344 size_t typesz3 =
sizeof(char) * sz3;
18345 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18349 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18356 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18360 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18362 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18364 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18366 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18367 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18368 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18369 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18371 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18373 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18375 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18378 size_t size[3] = {sz, sz2, sz3};
18379 size_t work_dimension = 3;
18382 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18383 work_dimension = 1;
18385 else if(temp_sz > 0){
18387 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18388 For default multidimensional global work size, leave the global_work_size vector empty, \ 18389 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18390 the global_work_size vector.");
18394 work_dimension = 1;
18396 else if (temp_sz == 2){
18399 work_dimension = 2;
18406 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18413 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18415 clWaitForEvents(1, &gpuExec);
18417 float *result = (
float *) malloc(typesz);
18418 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18420 v->assign(result, result+sz);
18422 if (typesz2 != typesz or sz != sz2){
18424 result2 = (
char *) malloc(typesz2);
18425 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18427 v2->assign(result2, result2+sz2);
18431 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18433 v2->assign(result, result+sz2);
18436 clReleaseCommandQueue (queue);
18437 clReleaseMemObject(buffer);
18438 clReleaseMemObject(buffer2);
18439 clReleaseMemObject(buffer3);
18440 clReleaseEvent(gpuExec);
18445 size_t sz = v->size();
18446 size_t sz2 = v2->size();
18447 size_t sz3 = v3->size();
18448 size_t typesz =
sizeof(float) * sz;
18449 size_t typesz2 =
sizeof(char) * sz2;
18450 size_t typesz3 =
sizeof(char) * sz3;
18451 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18455 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18462 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18466 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18468 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18470 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18472 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18473 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18474 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18475 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18477 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18479 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18481 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18484 size_t size[3] = {sz, sz2, sz3};
18485 size_t work_dimension = 3;
18488 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18489 work_dimension = 1;
18491 else if(temp_sz > 0){
18493 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18494 For default multidimensional global work size, leave the global_work_size vector empty, \ 18495 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18496 the global_work_size vector.");
18500 work_dimension = 1;
18502 else if (temp_sz == 2){
18505 work_dimension = 2;
18512 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18519 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18521 clWaitForEvents(1, &gpuExec);
18523 float *result = (
float *) malloc(typesz);
18524 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18526 v->assign(result, result+sz);
18528 if (typesz2 != typesz or sz != sz2){
18530 result2 = (
char *) malloc(typesz2);
18531 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18533 v2->assign(result2, result2+sz2);
18537 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18539 v2->assign(result, result+sz2);
18542 if (typesz3 != typesz or sz != sz3){
18544 result3 = (
char *) malloc(typesz3);
18545 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18547 v3->assign(result3, result3+sz3);
18551 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18553 v3->assign(result, result+sz3);
18556 clReleaseCommandQueue (queue);
18557 clReleaseMemObject(buffer);
18558 clReleaseMemObject(buffer2);
18559 clReleaseMemObject(buffer3);
18560 clReleaseEvent(gpuExec);
18566 size_t sz = v.size();
18567 size_t sz2 = v2.size();
18568 size_t sz3 = v3.size();
18569 size_t typesz =
sizeof(float) * sz;
18570 size_t typesz2 =
sizeof(char) * sz2;
18571 size_t typesz3 =
sizeof(int) * sz3;
18572 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18576 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18583 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18587 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18589 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18591 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18593 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18594 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18595 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18596 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18598 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
18600 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18602 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18605 size_t size[3] = {sz, sz2, sz3};
18606 size_t work_dimension = 3;
18609 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18610 work_dimension = 1;
18612 else if(temp_sz > 0){
18614 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18615 For default multidimensional global work size, leave the global_work_size vector empty, \ 18616 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18617 the global_work_size vector.");
18621 work_dimension = 1;
18623 else if (temp_sz == 2){
18626 work_dimension = 2;
18633 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18640 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18642 clWaitForEvents(1, &gpuExec);
18644 float *result = (
float *) malloc(typesz);
18645 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18647 std::vector<float> res = std::vector<float>();
18648 res.assign(result, result+sz);
18650 clReleaseCommandQueue (queue);
18651 clReleaseMemObject(buffer);
18652 clReleaseMemObject(buffer2);
18653 clReleaseMemObject(buffer3);
18654 clReleaseEvent(gpuExec);
18661 size_t sz = v->size();
18662 size_t sz2 = v2.size();
18663 size_t sz3 = v3.size();
18664 size_t typesz =
sizeof(float) * sz;
18665 size_t typesz2 =
sizeof(char) * sz2;
18666 size_t typesz3 =
sizeof(int) * sz3;
18667 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18671 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18678 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18682 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18684 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
18686 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18688 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18689 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18690 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18691 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18693 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18695 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
18697 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18700 size_t size[3] = {sz, sz2, sz3};
18701 size_t work_dimension = 3;
18704 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18705 work_dimension = 1;
18707 else if(temp_sz > 0){
18709 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18710 For default multidimensional global work size, leave the global_work_size vector empty, \ 18711 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18712 the global_work_size vector.");
18716 work_dimension = 1;
18718 else if (temp_sz == 2){
18721 work_dimension = 2;
18728 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18735 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18737 clWaitForEvents(1, &gpuExec);
18739 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18741 clWaitForEvents(1, &gpuExec);
18743 float *result = (
float *) malloc(typesz);
18744 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18746 v->assign(result, result+sz);
18748 clReleaseCommandQueue (queue);
18749 clReleaseMemObject(buffer);
18750 clReleaseMemObject(buffer2);
18751 clReleaseMemObject(buffer3);
18752 clReleaseEvent(gpuExec);
18757 size_t sz = v->size();
18758 size_t sz2 = v2->size();
18759 size_t sz3 = v3.size();
18760 size_t typesz =
sizeof(float) * sz;
18761 size_t typesz2 =
sizeof(char) * sz2;
18762 size_t typesz3 =
sizeof(int) * sz3;
18763 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18767 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18774 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18778 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18780 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18782 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
18784 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18785 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18786 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18787 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18789 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18791 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18793 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
18796 size_t size[3] = {sz, sz2, sz3};
18797 size_t work_dimension = 3;
18800 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18801 work_dimension = 1;
18803 else if(temp_sz > 0){
18805 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18806 For default multidimensional global work size, leave the global_work_size vector empty, \ 18807 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18808 the global_work_size vector.");
18812 work_dimension = 1;
18814 else if (temp_sz == 2){
18817 work_dimension = 2;
18824 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18831 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18833 clWaitForEvents(1, &gpuExec);
18835 float *result = (
float *) malloc(typesz);
18836 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18838 v->assign(result, result+sz);
18840 if (typesz2 != typesz or sz != sz2){
18842 result2 = (
char *) malloc(typesz2);
18843 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18845 v2->assign(result2, result2+sz2);
18849 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18851 v2->assign(result, result+sz2);
18854 clReleaseCommandQueue (queue);
18855 clReleaseMemObject(buffer);
18856 clReleaseMemObject(buffer2);
18857 clReleaseMemObject(buffer3);
18858 clReleaseEvent(gpuExec);
18863 size_t sz = v->size();
18864 size_t sz2 = v2->size();
18865 size_t sz3 = v3->size();
18866 size_t typesz =
sizeof(float) * sz;
18867 size_t typesz2 =
sizeof(char) * sz2;
18868 size_t typesz3 =
sizeof(int) * sz3;
18869 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18873 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
18880 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
18884 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
18886 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
18888 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
18890 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
18891 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
18892 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
18893 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
18895 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
18897 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
18899 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
18902 size_t size[3] = {sz, sz2, sz3};
18903 size_t work_dimension = 3;
18906 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
18907 work_dimension = 1;
18909 else if(temp_sz > 0){
18911 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 18912 For default multidimensional global work size, leave the global_work_size vector empty, \ 18913 and set multi_dimensional to true. Setting the global work size based on the values inside \ 18914 the global_work_size vector.");
18918 work_dimension = 1;
18920 else if (temp_sz == 2){
18923 work_dimension = 2;
18930 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
18937 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
18939 clWaitForEvents(1, &gpuExec);
18941 float *result = (
float *) malloc(typesz);
18942 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
18944 v->assign(result, result+sz);
18946 if (typesz2 != typesz or sz != sz2){
18948 result2 = (
char *) malloc(typesz2);
18949 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
18951 v2->assign(result2, result2+sz2);
18955 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18957 v2->assign(result, result+sz2);
18960 if (typesz3 != typesz or sz != sz3){
18962 result3 = (
int *) malloc(typesz3);
18963 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
18965 v3->assign(result3, result3+sz3);
18969 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
18971 v3->assign(result, result+sz3);
18974 clReleaseCommandQueue (queue);
18975 clReleaseMemObject(buffer);
18976 clReleaseMemObject(buffer2);
18977 clReleaseMemObject(buffer3);
18978 clReleaseEvent(gpuExec);
18984 size_t sz = v.size();
18985 size_t sz2 = v2.size();
18986 size_t sz3 = v3.size();
18987 size_t typesz =
sizeof(float) * sz;
18988 size_t typesz2 =
sizeof(char) * sz2;
18989 size_t typesz3 =
sizeof(float) * sz3;
18990 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
18994 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19001 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19005 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19007 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19009 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19011 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19012 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19013 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19014 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19016 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19018 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19020 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19023 size_t size[3] = {sz, sz2, sz3};
19024 size_t work_dimension = 3;
19027 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19028 work_dimension = 1;
19030 else if(temp_sz > 0){
19032 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19033 For default multidimensional global work size, leave the global_work_size vector empty, \ 19034 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19035 the global_work_size vector.");
19039 work_dimension = 1;
19041 else if (temp_sz == 2){
19044 work_dimension = 2;
19051 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19058 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19060 clWaitForEvents(1, &gpuExec);
19062 float *result = (
float *) malloc(typesz);
19063 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19065 std::vector<float> res = std::vector<float>();
19066 res.assign(result, result+sz);
19068 clReleaseCommandQueue (queue);
19069 clReleaseMemObject(buffer);
19070 clReleaseMemObject(buffer2);
19071 clReleaseMemObject(buffer3);
19072 clReleaseEvent(gpuExec);
19079 size_t sz = v->size();
19080 size_t sz2 = v2.size();
19081 size_t sz3 = v3.size();
19082 size_t typesz =
sizeof(float) * sz;
19083 size_t typesz2 =
sizeof(char) * sz2;
19084 size_t typesz3 =
sizeof(float) * sz3;
19085 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19089 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19096 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19100 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19102 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19104 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19106 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19107 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19108 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19109 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19111 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19113 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19115 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19118 size_t size[3] = {sz, sz2, sz3};
19119 size_t work_dimension = 3;
19122 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19123 work_dimension = 1;
19125 else if(temp_sz > 0){
19127 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19128 For default multidimensional global work size, leave the global_work_size vector empty, \ 19129 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19130 the global_work_size vector.");
19134 work_dimension = 1;
19136 else if (temp_sz == 2){
19139 work_dimension = 2;
19146 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19153 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19155 clWaitForEvents(1, &gpuExec);
19157 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19159 clWaitForEvents(1, &gpuExec);
19161 float *result = (
float *) malloc(typesz);
19162 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19164 v->assign(result, result+sz);
19166 clReleaseCommandQueue (queue);
19167 clReleaseMemObject(buffer);
19168 clReleaseMemObject(buffer2);
19169 clReleaseMemObject(buffer3);
19170 clReleaseEvent(gpuExec);
19175 size_t sz = v->size();
19176 size_t sz2 = v2->size();
19177 size_t sz3 = v3.size();
19178 size_t typesz =
sizeof(float) * sz;
19179 size_t typesz2 =
sizeof(char) * sz2;
19180 size_t typesz3 =
sizeof(float) * sz3;
19181 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19185 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19192 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19196 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19198 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19200 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19202 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19203 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19204 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19205 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19207 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19209 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19211 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19214 size_t size[3] = {sz, sz2, sz3};
19215 size_t work_dimension = 3;
19218 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19219 work_dimension = 1;
19221 else if(temp_sz > 0){
19223 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19224 For default multidimensional global work size, leave the global_work_size vector empty, \ 19225 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19226 the global_work_size vector.");
19230 work_dimension = 1;
19232 else if (temp_sz == 2){
19235 work_dimension = 2;
19242 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19249 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19251 clWaitForEvents(1, &gpuExec);
19253 float *result = (
float *) malloc(typesz);
19254 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19256 v->assign(result, result+sz);
19258 if (typesz2 != typesz or sz != sz2){
19260 result2 = (
char *) malloc(typesz2);
19261 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19263 v2->assign(result2, result2+sz2);
19267 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19269 v2->assign(result, result+sz2);
19272 clReleaseCommandQueue (queue);
19273 clReleaseMemObject(buffer);
19274 clReleaseMemObject(buffer2);
19275 clReleaseMemObject(buffer3);
19276 clReleaseEvent(gpuExec);
19281 size_t sz = v->size();
19282 size_t sz2 = v2->size();
19283 size_t sz3 = v3->size();
19284 size_t typesz =
sizeof(float) * sz;
19285 size_t typesz2 =
sizeof(char) * sz2;
19286 size_t typesz3 =
sizeof(float) * sz3;
19287 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19291 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19298 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19302 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19304 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19306 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
19308 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19309 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19310 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19311 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19313 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19315 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19317 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
19320 size_t size[3] = {sz, sz2, sz3};
19321 size_t work_dimension = 3;
19324 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19325 work_dimension = 1;
19327 else if(temp_sz > 0){
19329 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19330 For default multidimensional global work size, leave the global_work_size vector empty, \ 19331 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19332 the global_work_size vector.");
19336 work_dimension = 1;
19338 else if (temp_sz == 2){
19341 work_dimension = 2;
19348 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19355 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19357 clWaitForEvents(1, &gpuExec);
19359 float *result = (
float *) malloc(typesz);
19360 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19362 v->assign(result, result+sz);
19364 if (typesz2 != typesz or sz != sz2){
19366 result2 = (
char *) malloc(typesz2);
19367 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19369 v2->assign(result2, result2+sz2);
19373 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19375 v2->assign(result, result+sz2);
19378 if (typesz3 != typesz or sz != sz3){
19380 result3 = (
float *) malloc(typesz3);
19381 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
19383 v3->assign(result3, result3+sz3);
19387 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19389 v3->assign(result, result+sz3);
19392 clReleaseCommandQueue (queue);
19393 clReleaseMemObject(buffer);
19394 clReleaseMemObject(buffer2);
19395 clReleaseMemObject(buffer3);
19396 clReleaseEvent(gpuExec);
19402 size_t sz = v.size();
19403 size_t sz2 = v2.size();
19404 size_t sz3 = v3.size();
19405 size_t typesz =
sizeof(float) * sz;
19406 size_t typesz2 =
sizeof(char) * sz2;
19407 size_t typesz3 =
sizeof(double) * sz3;
19408 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19412 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19419 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19423 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19425 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19427 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19429 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19430 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19431 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19432 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19434 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19436 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19438 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19441 size_t size[3] = {sz, sz2, sz3};
19442 size_t work_dimension = 3;
19445 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19446 work_dimension = 1;
19448 else if(temp_sz > 0){
19450 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19451 For default multidimensional global work size, leave the global_work_size vector empty, \ 19452 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19453 the global_work_size vector.");
19457 work_dimension = 1;
19459 else if (temp_sz == 2){
19462 work_dimension = 2;
19469 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19476 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19478 clWaitForEvents(1, &gpuExec);
19480 float *result = (
float *) malloc(typesz);
19481 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19483 std::vector<float> res = std::vector<float>();
19484 res.assign(result, result+sz);
19486 clReleaseCommandQueue (queue);
19487 clReleaseMemObject(buffer);
19488 clReleaseMemObject(buffer2);
19489 clReleaseMemObject(buffer3);
19490 clReleaseEvent(gpuExec);
19497 size_t sz = v->size();
19498 size_t sz2 = v2.size();
19499 size_t sz3 = v3.size();
19500 size_t typesz =
sizeof(float) * sz;
19501 size_t typesz2 =
sizeof(char) * sz2;
19502 size_t typesz3 =
sizeof(double) * sz3;
19503 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19507 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19514 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19518 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19520 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19522 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19524 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19525 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19526 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19527 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19529 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19531 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19533 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19536 size_t size[3] = {sz, sz2, sz3};
19537 size_t work_dimension = 3;
19540 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19541 work_dimension = 1;
19543 else if(temp_sz > 0){
19545 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19546 For default multidimensional global work size, leave the global_work_size vector empty, \ 19547 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19548 the global_work_size vector.");
19552 work_dimension = 1;
19554 else if (temp_sz == 2){
19557 work_dimension = 2;
19564 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19571 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19573 clWaitForEvents(1, &gpuExec);
19575 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19577 clWaitForEvents(1, &gpuExec);
19579 float *result = (
float *) malloc(typesz);
19580 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19582 v->assign(result, result+sz);
19584 clReleaseCommandQueue (queue);
19585 clReleaseMemObject(buffer);
19586 clReleaseMemObject(buffer2);
19587 clReleaseMemObject(buffer3);
19588 clReleaseEvent(gpuExec);
19593 size_t sz = v->size();
19594 size_t sz2 = v2->size();
19595 size_t sz3 = v3.size();
19596 size_t typesz =
sizeof(float) * sz;
19597 size_t typesz2 =
sizeof(char) * sz2;
19598 size_t typesz3 =
sizeof(double) * sz3;
19599 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19603 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19610 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19614 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19616 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19618 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19620 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19621 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19622 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19623 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19625 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19627 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19629 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19632 size_t size[3] = {sz, sz2, sz3};
19633 size_t work_dimension = 3;
19636 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19637 work_dimension = 1;
19639 else if(temp_sz > 0){
19641 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19642 For default multidimensional global work size, leave the global_work_size vector empty, \ 19643 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19644 the global_work_size vector.");
19648 work_dimension = 1;
19650 else if (temp_sz == 2){
19653 work_dimension = 2;
19660 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19667 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19669 clWaitForEvents(1, &gpuExec);
19671 float *result = (
float *) malloc(typesz);
19672 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19674 v->assign(result, result+sz);
19676 if (typesz2 != typesz or sz != sz2){
19678 result2 = (
char *) malloc(typesz2);
19679 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19681 v2->assign(result2, result2+sz2);
19685 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19687 v2->assign(result, result+sz2);
19690 clReleaseCommandQueue (queue);
19691 clReleaseMemObject(buffer);
19692 clReleaseMemObject(buffer2);
19693 clReleaseMemObject(buffer3);
19694 clReleaseEvent(gpuExec);
19699 size_t sz = v->size();
19700 size_t sz2 = v2->size();
19701 size_t sz3 = v3->size();
19702 size_t typesz =
sizeof(float) * sz;
19703 size_t typesz2 =
sizeof(char) * sz2;
19704 size_t typesz3 =
sizeof(double) * sz3;
19705 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19709 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19716 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19720 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19722 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
19724 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
19726 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19727 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19728 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19729 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19731 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19733 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
19735 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
19738 size_t size[3] = {sz, sz2, sz3};
19739 size_t work_dimension = 3;
19742 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19743 work_dimension = 1;
19745 else if(temp_sz > 0){
19747 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19748 For default multidimensional global work size, leave the global_work_size vector empty, \ 19749 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19750 the global_work_size vector.");
19754 work_dimension = 1;
19756 else if (temp_sz == 2){
19759 work_dimension = 2;
19766 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19773 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19775 clWaitForEvents(1, &gpuExec);
19777 float *result = (
float *) malloc(typesz);
19778 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19780 v->assign(result, result+sz);
19782 if (typesz2 != typesz or sz != sz2){
19784 result2 = (
char *) malloc(typesz2);
19785 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
19787 v2->assign(result2, result2+sz2);
19791 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19793 v2->assign(result, result+sz2);
19796 if (typesz3 != typesz or sz != sz3){
19798 result3 = (
double *) malloc(typesz3);
19799 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
19801 v3->assign(result3, result3+sz3);
19805 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
19807 v3->assign(result, result+sz3);
19810 clReleaseCommandQueue (queue);
19811 clReleaseMemObject(buffer);
19812 clReleaseMemObject(buffer2);
19813 clReleaseMemObject(buffer3);
19814 clReleaseEvent(gpuExec);
19820 size_t sz = v.size();
19821 size_t sz2 = v2.size();
19822 size_t sz3 = v3.size();
19823 size_t typesz =
sizeof(float) * sz;
19824 size_t typesz2 =
sizeof(int) * sz2;
19825 size_t typesz3 =
sizeof(char) * sz3;
19826 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19830 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19837 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19841 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19843 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19845 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19847 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19848 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19849 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19850 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19852 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
19854 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19856 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19859 size_t size[3] = {sz, sz2, sz3};
19860 size_t work_dimension = 3;
19863 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19864 work_dimension = 1;
19866 else if(temp_sz > 0){
19868 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19869 For default multidimensional global work size, leave the global_work_size vector empty, \ 19870 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19871 the global_work_size vector.");
19875 work_dimension = 1;
19877 else if (temp_sz == 2){
19880 work_dimension = 2;
19887 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19894 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19896 clWaitForEvents(1, &gpuExec);
19898 float *result = (
float *) malloc(typesz);
19899 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
19901 std::vector<float> res = std::vector<float>();
19902 res.assign(result, result+sz);
19904 clReleaseCommandQueue (queue);
19905 clReleaseMemObject(buffer);
19906 clReleaseMemObject(buffer2);
19907 clReleaseMemObject(buffer3);
19908 clReleaseEvent(gpuExec);
19915 size_t sz = v->size();
19916 size_t sz2 = v2.size();
19917 size_t sz3 = v3.size();
19918 size_t typesz =
sizeof(float) * sz;
19919 size_t typesz2 =
sizeof(int) * sz2;
19920 size_t typesz3 =
sizeof(char) * sz3;
19921 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
19925 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
19932 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
19936 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
19938 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
19940 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
19942 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
19943 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
19944 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
19945 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
19947 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
19949 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
19951 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
19954 size_t size[3] = {sz, sz2, sz3};
19955 size_t work_dimension = 3;
19958 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
19959 work_dimension = 1;
19961 else if(temp_sz > 0){
19963 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 19964 For default multidimensional global work size, leave the global_work_size vector empty, \ 19965 and set multi_dimensional to true. Setting the global work size based on the values inside \ 19966 the global_work_size vector.");
19970 work_dimension = 1;
19972 else if (temp_sz == 2){
19975 work_dimension = 2;
19982 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
19989 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19991 clWaitForEvents(1, &gpuExec);
19993 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
19995 clWaitForEvents(1, &gpuExec);
19997 float *result = (
float *) malloc(typesz);
19998 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20000 v->assign(result, result+sz);
20002 clReleaseCommandQueue (queue);
20003 clReleaseMemObject(buffer);
20004 clReleaseMemObject(buffer2);
20005 clReleaseMemObject(buffer3);
20006 clReleaseEvent(gpuExec);
20011 size_t sz = v->size();
20012 size_t sz2 = v2->size();
20013 size_t sz3 = v3.size();
20014 size_t typesz =
sizeof(float) * sz;
20015 size_t typesz2 =
sizeof(int) * sz2;
20016 size_t typesz3 =
sizeof(char) * sz3;
20017 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20021 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20028 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20032 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20034 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20036 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20038 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20039 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20040 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20041 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20043 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20045 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20047 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20050 size_t size[3] = {sz, sz2, sz3};
20051 size_t work_dimension = 3;
20054 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20055 work_dimension = 1;
20057 else if(temp_sz > 0){
20059 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20060 For default multidimensional global work size, leave the global_work_size vector empty, \ 20061 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20062 the global_work_size vector.");
20066 work_dimension = 1;
20068 else if (temp_sz == 2){
20071 work_dimension = 2;
20078 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20085 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20087 clWaitForEvents(1, &gpuExec);
20089 float *result = (
float *) malloc(typesz);
20090 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20092 v->assign(result, result+sz);
20094 if (typesz2 != typesz or sz != sz2){
20096 result2 = (
int *) malloc(typesz2);
20097 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20099 v2->assign(result2, result2+sz2);
20103 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20105 v2->assign(result, result+sz2);
20108 clReleaseCommandQueue (queue);
20109 clReleaseMemObject(buffer);
20110 clReleaseMemObject(buffer2);
20111 clReleaseMemObject(buffer3);
20112 clReleaseEvent(gpuExec);
20117 size_t sz = v->size();
20118 size_t sz2 = v2->size();
20119 size_t sz3 = v3->size();
20120 size_t typesz =
sizeof(float) * sz;
20121 size_t typesz2 =
sizeof(int) * sz2;
20122 size_t typesz3 =
sizeof(char) * sz3;
20123 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20127 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20134 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20138 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20140 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20142 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20144 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20145 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20146 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20147 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20149 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20151 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20153 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20156 size_t size[3] = {sz, sz2, sz3};
20157 size_t work_dimension = 3;
20160 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20161 work_dimension = 1;
20163 else if(temp_sz > 0){
20165 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20166 For default multidimensional global work size, leave the global_work_size vector empty, \ 20167 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20168 the global_work_size vector.");
20172 work_dimension = 1;
20174 else if (temp_sz == 2){
20177 work_dimension = 2;
20184 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20191 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20193 clWaitForEvents(1, &gpuExec);
20195 float *result = (
float *) malloc(typesz);
20196 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20198 v->assign(result, result+sz);
20200 if (typesz2 != typesz or sz != sz2){
20202 result2 = (
int *) malloc(typesz2);
20203 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20205 v2->assign(result2, result2+sz2);
20209 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20211 v2->assign(result, result+sz2);
20214 if (typesz3 != typesz or sz != sz3){
20216 result3 = (
char *) malloc(typesz3);
20217 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
20219 v3->assign(result3, result3+sz3);
20223 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20225 v3->assign(result, result+sz3);
20228 clReleaseCommandQueue (queue);
20229 clReleaseMemObject(buffer);
20230 clReleaseMemObject(buffer2);
20231 clReleaseMemObject(buffer3);
20232 clReleaseEvent(gpuExec);
20238 size_t sz = v.size();
20239 size_t sz2 = v2.size();
20240 size_t sz3 = v3.size();
20241 size_t typesz =
sizeof(float) * sz;
20242 size_t typesz2 =
sizeof(int) * sz2;
20243 size_t typesz3 =
sizeof(int) * sz3;
20244 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20248 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20255 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20259 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20261 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20263 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20265 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20266 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20267 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20268 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20270 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
20272 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20274 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20277 size_t size[3] = {sz, sz2, sz3};
20278 size_t work_dimension = 3;
20281 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20282 work_dimension = 1;
20284 else if(temp_sz > 0){
20286 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20287 For default multidimensional global work size, leave the global_work_size vector empty, \ 20288 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20289 the global_work_size vector.");
20293 work_dimension = 1;
20295 else if (temp_sz == 2){
20298 work_dimension = 2;
20305 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20312 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20314 clWaitForEvents(1, &gpuExec);
20316 float *result = (
float *) malloc(typesz);
20317 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20319 std::vector<float> res = std::vector<float>();
20320 res.assign(result, result+sz);
20322 clReleaseCommandQueue (queue);
20323 clReleaseMemObject(buffer);
20324 clReleaseMemObject(buffer2);
20325 clReleaseMemObject(buffer3);
20326 clReleaseEvent(gpuExec);
20333 size_t sz = v->size();
20334 size_t sz2 = v2.size();
20335 size_t sz3 = v3.size();
20336 size_t typesz =
sizeof(float) * sz;
20337 size_t typesz2 =
sizeof(int) * sz2;
20338 size_t typesz3 =
sizeof(int) * sz3;
20339 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20343 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20350 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20354 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20356 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20358 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20360 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20361 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20362 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20363 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20365 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20367 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20369 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20372 size_t size[3] = {sz, sz2, sz3};
20373 size_t work_dimension = 3;
20376 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20377 work_dimension = 1;
20379 else if(temp_sz > 0){
20381 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20382 For default multidimensional global work size, leave the global_work_size vector empty, \ 20383 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20384 the global_work_size vector.");
20388 work_dimension = 1;
20390 else if (temp_sz == 2){
20393 work_dimension = 2;
20400 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20407 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20409 clWaitForEvents(1, &gpuExec);
20411 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20413 clWaitForEvents(1, &gpuExec);
20415 float *result = (
float *) malloc(typesz);
20416 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20418 v->assign(result, result+sz);
20420 clReleaseCommandQueue (queue);
20421 clReleaseMemObject(buffer);
20422 clReleaseMemObject(buffer2);
20423 clReleaseMemObject(buffer3);
20424 clReleaseEvent(gpuExec);
20429 size_t sz = v->size();
20430 size_t sz2 = v2->size();
20431 size_t sz3 = v3.size();
20432 size_t typesz =
sizeof(float) * sz;
20433 size_t typesz2 =
sizeof(int) * sz2;
20434 size_t typesz3 =
sizeof(int) * sz3;
20435 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20439 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20446 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20450 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20452 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20454 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20456 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20457 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20458 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20459 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20461 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20463 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20465 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20468 size_t size[3] = {sz, sz2, sz3};
20469 size_t work_dimension = 3;
20472 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20473 work_dimension = 1;
20475 else if(temp_sz > 0){
20477 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20478 For default multidimensional global work size, leave the global_work_size vector empty, \ 20479 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20480 the global_work_size vector.");
20484 work_dimension = 1;
20486 else if (temp_sz == 2){
20489 work_dimension = 2;
20496 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20503 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20505 clWaitForEvents(1, &gpuExec);
20507 float *result = (
float *) malloc(typesz);
20508 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20510 v->assign(result, result+sz);
20512 if (typesz2 != typesz or sz != sz2){
20514 result2 = (
int *) malloc(typesz2);
20515 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20517 v2->assign(result2, result2+sz2);
20521 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20523 v2->assign(result, result+sz2);
20526 clReleaseCommandQueue (queue);
20527 clReleaseMemObject(buffer);
20528 clReleaseMemObject(buffer2);
20529 clReleaseMemObject(buffer3);
20530 clReleaseEvent(gpuExec);
20535 size_t sz = v->size();
20536 size_t sz2 = v2->size();
20537 size_t sz3 = v3->size();
20538 size_t typesz =
sizeof(float) * sz;
20539 size_t typesz2 =
sizeof(int) * sz2;
20540 size_t typesz3 =
sizeof(int) * sz3;
20541 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20545 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20552 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20556 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20558 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20560 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20562 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20563 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20564 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20565 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20567 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20569 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20571 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20574 size_t size[3] = {sz, sz2, sz3};
20575 size_t work_dimension = 3;
20578 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20579 work_dimension = 1;
20581 else if(temp_sz > 0){
20583 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20584 For default multidimensional global work size, leave the global_work_size vector empty, \ 20585 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20586 the global_work_size vector.");
20590 work_dimension = 1;
20592 else if (temp_sz == 2){
20595 work_dimension = 2;
20602 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20609 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20611 clWaitForEvents(1, &gpuExec);
20613 float *result = (
float *) malloc(typesz);
20614 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20616 v->assign(result, result+sz);
20618 if (typesz2 != typesz or sz != sz2){
20620 result2 = (
int *) malloc(typesz2);
20621 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20623 v2->assign(result2, result2+sz2);
20627 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20629 v2->assign(result, result+sz2);
20632 if (typesz3 != typesz or sz != sz3){
20634 result3 = (
int *) malloc(typesz3);
20635 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
20637 v3->assign(result3, result3+sz3);
20641 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20643 v3->assign(result, result+sz3);
20646 clReleaseCommandQueue (queue);
20647 clReleaseMemObject(buffer);
20648 clReleaseMemObject(buffer2);
20649 clReleaseMemObject(buffer3);
20650 clReleaseEvent(gpuExec);
20656 size_t sz = v.size();
20657 size_t sz2 = v2.size();
20658 size_t sz3 = v3.size();
20659 size_t typesz =
sizeof(float) * sz;
20660 size_t typesz2 =
sizeof(int) * sz2;
20661 size_t typesz3 =
sizeof(float) * sz3;
20662 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20666 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20673 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20677 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20679 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20681 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20683 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20684 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20685 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20686 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20688 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
20690 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20692 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20695 size_t size[3] = {sz, sz2, sz3};
20696 size_t work_dimension = 3;
20699 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20700 work_dimension = 1;
20702 else if(temp_sz > 0){
20704 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20705 For default multidimensional global work size, leave the global_work_size vector empty, \ 20706 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20707 the global_work_size vector.");
20711 work_dimension = 1;
20713 else if (temp_sz == 2){
20716 work_dimension = 2;
20723 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20730 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20732 clWaitForEvents(1, &gpuExec);
20734 float *result = (
float *) malloc(typesz);
20735 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20737 std::vector<float> res = std::vector<float>();
20738 res.assign(result, result+sz);
20740 clReleaseCommandQueue (queue);
20741 clReleaseMemObject(buffer);
20742 clReleaseMemObject(buffer2);
20743 clReleaseMemObject(buffer3);
20744 clReleaseEvent(gpuExec);
20751 size_t sz = v->size();
20752 size_t sz2 = v2.size();
20753 size_t sz3 = v3.size();
20754 size_t typesz =
sizeof(float) * sz;
20755 size_t typesz2 =
sizeof(int) * sz2;
20756 size_t typesz3 =
sizeof(float) * sz3;
20757 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20761 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20768 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20772 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20774 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
20776 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20778 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20779 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20780 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20781 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20783 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20785 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
20787 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20790 size_t size[3] = {sz, sz2, sz3};
20791 size_t work_dimension = 3;
20794 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20795 work_dimension = 1;
20797 else if(temp_sz > 0){
20799 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20800 For default multidimensional global work size, leave the global_work_size vector empty, \ 20801 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20802 the global_work_size vector.");
20806 work_dimension = 1;
20808 else if (temp_sz == 2){
20811 work_dimension = 2;
20818 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20825 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20827 clWaitForEvents(1, &gpuExec);
20829 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20831 clWaitForEvents(1, &gpuExec);
20833 float *result = (
float *) malloc(typesz);
20834 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20836 v->assign(result, result+sz);
20838 clReleaseCommandQueue (queue);
20839 clReleaseMemObject(buffer);
20840 clReleaseMemObject(buffer2);
20841 clReleaseMemObject(buffer3);
20842 clReleaseEvent(gpuExec);
20847 size_t sz = v->size();
20848 size_t sz2 = v2->size();
20849 size_t sz3 = v3.size();
20850 size_t typesz =
sizeof(float) * sz;
20851 size_t typesz2 =
sizeof(int) * sz2;
20852 size_t typesz3 =
sizeof(float) * sz3;
20853 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20857 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20864 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20868 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20870 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20872 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
20874 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20875 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20876 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20877 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20879 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20881 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20883 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
20886 size_t size[3] = {sz, sz2, sz3};
20887 size_t work_dimension = 3;
20890 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20891 work_dimension = 1;
20893 else if(temp_sz > 0){
20895 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 20896 For default multidimensional global work size, leave the global_work_size vector empty, \ 20897 and set multi_dimensional to true. Setting the global work size based on the values inside \ 20898 the global_work_size vector.");
20902 work_dimension = 1;
20904 else if (temp_sz == 2){
20907 work_dimension = 2;
20914 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
20921 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
20923 clWaitForEvents(1, &gpuExec);
20925 float *result = (
float *) malloc(typesz);
20926 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
20928 v->assign(result, result+sz);
20930 if (typesz2 != typesz or sz != sz2){
20932 result2 = (
int *) malloc(typesz2);
20933 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
20935 v2->assign(result2, result2+sz2);
20939 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
20941 v2->assign(result, result+sz2);
20944 clReleaseCommandQueue (queue);
20945 clReleaseMemObject(buffer);
20946 clReleaseMemObject(buffer2);
20947 clReleaseMemObject(buffer3);
20948 clReleaseEvent(gpuExec);
20953 size_t sz = v->size();
20954 size_t sz2 = v2->size();
20955 size_t sz3 = v3->size();
20956 size_t typesz =
sizeof(float) * sz;
20957 size_t typesz2 =
sizeof(int) * sz2;
20958 size_t typesz3 =
sizeof(float) * sz3;
20959 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
20963 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
20970 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
20974 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
20976 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
20978 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
20980 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
20981 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
20982 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
20983 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
20985 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
20987 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
20989 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
20992 size_t size[3] = {sz, sz2, sz3};
20993 size_t work_dimension = 3;
20996 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
20997 work_dimension = 1;
20999 else if(temp_sz > 0){
21001 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21002 For default multidimensional global work size, leave the global_work_size vector empty, \ 21003 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21004 the global_work_size vector.");
21008 work_dimension = 1;
21010 else if (temp_sz == 2){
21013 work_dimension = 2;
21020 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21027 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21029 clWaitForEvents(1, &gpuExec);
21031 float *result = (
float *) malloc(typesz);
21032 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21034 v->assign(result, result+sz);
21036 if (typesz2 != typesz or sz != sz2){
21038 result2 = (
int *) malloc(typesz2);
21039 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21041 v2->assign(result2, result2+sz2);
21045 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21047 v2->assign(result, result+sz2);
21050 if (typesz3 != typesz or sz != sz3){
21052 result3 = (
float *) malloc(typesz3);
21053 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21055 v3->assign(result3, result3+sz3);
21059 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21061 v3->assign(result, result+sz3);
21064 clReleaseCommandQueue (queue);
21065 clReleaseMemObject(buffer);
21066 clReleaseMemObject(buffer2);
21067 clReleaseMemObject(buffer3);
21068 clReleaseEvent(gpuExec);
21074 size_t sz = v.size();
21075 size_t sz2 = v2.size();
21076 size_t sz3 = v3.size();
21077 size_t typesz =
sizeof(float) * sz;
21078 size_t typesz2 =
sizeof(int) * sz2;
21079 size_t typesz3 =
sizeof(double) * sz3;
21080 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21084 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21091 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21095 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21097 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21099 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21101 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21102 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21103 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21104 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21106 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21108 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21110 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21113 size_t size[3] = {sz, sz2, sz3};
21114 size_t work_dimension = 3;
21117 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21118 work_dimension = 1;
21120 else if(temp_sz > 0){
21122 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21123 For default multidimensional global work size, leave the global_work_size vector empty, \ 21124 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21125 the global_work_size vector.");
21129 work_dimension = 1;
21131 else if (temp_sz == 2){
21134 work_dimension = 2;
21141 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21148 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21150 clWaitForEvents(1, &gpuExec);
21152 float *result = (
float *) malloc(typesz);
21153 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21155 std::vector<float> res = std::vector<float>();
21156 res.assign(result, result+sz);
21158 clReleaseCommandQueue (queue);
21159 clReleaseMemObject(buffer);
21160 clReleaseMemObject(buffer2);
21161 clReleaseMemObject(buffer3);
21162 clReleaseEvent(gpuExec);
21169 size_t sz = v->size();
21170 size_t sz2 = v2.size();
21171 size_t sz3 = v3.size();
21172 size_t typesz =
sizeof(float) * sz;
21173 size_t typesz2 =
sizeof(int) * sz2;
21174 size_t typesz3 =
sizeof(double) * sz3;
21175 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21179 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21186 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21190 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21192 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21194 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21196 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21197 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21198 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21199 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21201 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21203 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21205 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21208 size_t size[3] = {sz, sz2, sz3};
21209 size_t work_dimension = 3;
21212 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21213 work_dimension = 1;
21215 else if(temp_sz > 0){
21217 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21218 For default multidimensional global work size, leave the global_work_size vector empty, \ 21219 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21220 the global_work_size vector.");
21224 work_dimension = 1;
21226 else if (temp_sz == 2){
21229 work_dimension = 2;
21236 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21243 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21245 clWaitForEvents(1, &gpuExec);
21247 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21249 clWaitForEvents(1, &gpuExec);
21251 float *result = (
float *) malloc(typesz);
21252 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21254 v->assign(result, result+sz);
21256 clReleaseCommandQueue (queue);
21257 clReleaseMemObject(buffer);
21258 clReleaseMemObject(buffer2);
21259 clReleaseMemObject(buffer3);
21260 clReleaseEvent(gpuExec);
21265 size_t sz = v->size();
21266 size_t sz2 = v2->size();
21267 size_t sz3 = v3.size();
21268 size_t typesz =
sizeof(float) * sz;
21269 size_t typesz2 =
sizeof(int) * sz2;
21270 size_t typesz3 =
sizeof(double) * sz3;
21271 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21275 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21282 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21286 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21288 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21290 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21292 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21293 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21294 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21295 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21297 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21299 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21301 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21304 size_t size[3] = {sz, sz2, sz3};
21305 size_t work_dimension = 3;
21308 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21309 work_dimension = 1;
21311 else if(temp_sz > 0){
21313 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21314 For default multidimensional global work size, leave the global_work_size vector empty, \ 21315 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21316 the global_work_size vector.");
21320 work_dimension = 1;
21322 else if (temp_sz == 2){
21325 work_dimension = 2;
21332 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21339 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21341 clWaitForEvents(1, &gpuExec);
21343 float *result = (
float *) malloc(typesz);
21344 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21346 v->assign(result, result+sz);
21348 if (typesz2 != typesz or sz != sz2){
21350 result2 = (
int *) malloc(typesz2);
21351 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21353 v2->assign(result2, result2+sz2);
21357 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21359 v2->assign(result, result+sz2);
21362 clReleaseCommandQueue (queue);
21363 clReleaseMemObject(buffer);
21364 clReleaseMemObject(buffer2);
21365 clReleaseMemObject(buffer3);
21366 clReleaseEvent(gpuExec);
21371 size_t sz = v->size();
21372 size_t sz2 = v2->size();
21373 size_t sz3 = v3->size();
21374 size_t typesz =
sizeof(float) * sz;
21375 size_t typesz2 =
sizeof(int) * sz2;
21376 size_t typesz3 =
sizeof(double) * sz3;
21377 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21381 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21388 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21392 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21394 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21396 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
21398 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21399 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21400 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21401 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21403 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21405 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21407 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
21410 size_t size[3] = {sz, sz2, sz3};
21411 size_t work_dimension = 3;
21414 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21415 work_dimension = 1;
21417 else if(temp_sz > 0){
21419 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21420 For default multidimensional global work size, leave the global_work_size vector empty, \ 21421 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21422 the global_work_size vector.");
21426 work_dimension = 1;
21428 else if (temp_sz == 2){
21431 work_dimension = 2;
21438 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21445 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21447 clWaitForEvents(1, &gpuExec);
21449 float *result = (
float *) malloc(typesz);
21450 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21452 v->assign(result, result+sz);
21454 if (typesz2 != typesz or sz != sz2){
21456 result2 = (
int *) malloc(typesz2);
21457 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21459 v2->assign(result2, result2+sz2);
21463 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21465 v2->assign(result, result+sz2);
21468 if (typesz3 != typesz or sz != sz3){
21470 result3 = (
double *) malloc(typesz3);
21471 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21473 v3->assign(result3, result3+sz3);
21477 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21479 v3->assign(result, result+sz3);
21482 clReleaseCommandQueue (queue);
21483 clReleaseMemObject(buffer);
21484 clReleaseMemObject(buffer2);
21485 clReleaseMemObject(buffer3);
21486 clReleaseEvent(gpuExec);
21492 size_t sz = v.size();
21493 size_t sz2 = v2.size();
21494 size_t sz3 = v3.size();
21495 size_t typesz =
sizeof(float) * sz;
21496 size_t typesz2 =
sizeof(float) * sz2;
21497 size_t typesz3 =
sizeof(char) * sz3;
21498 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21502 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21509 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21513 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21515 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21517 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21519 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21520 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21521 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21522 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21524 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21526 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21528 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21531 size_t size[3] = {sz, sz2, sz3};
21532 size_t work_dimension = 3;
21535 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21536 work_dimension = 1;
21538 else if(temp_sz > 0){
21540 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21541 For default multidimensional global work size, leave the global_work_size vector empty, \ 21542 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21543 the global_work_size vector.");
21547 work_dimension = 1;
21549 else if (temp_sz == 2){
21552 work_dimension = 2;
21559 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21566 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21568 clWaitForEvents(1, &gpuExec);
21570 float *result = (
float *) malloc(typesz);
21571 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21573 std::vector<float> res = std::vector<float>();
21574 res.assign(result, result+sz);
21576 clReleaseCommandQueue (queue);
21577 clReleaseMemObject(buffer);
21578 clReleaseMemObject(buffer2);
21579 clReleaseMemObject(buffer3);
21580 clReleaseEvent(gpuExec);
21587 size_t sz = v->size();
21588 size_t sz2 = v2.size();
21589 size_t sz3 = v3.size();
21590 size_t typesz =
sizeof(float) * sz;
21591 size_t typesz2 =
sizeof(float) * sz2;
21592 size_t typesz3 =
sizeof(char) * sz3;
21593 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21597 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21604 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21608 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21610 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21612 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21614 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21615 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21616 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21617 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21619 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21621 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21623 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21626 size_t size[3] = {sz, sz2, sz3};
21627 size_t work_dimension = 3;
21630 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21631 work_dimension = 1;
21633 else if(temp_sz > 0){
21635 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21636 For default multidimensional global work size, leave the global_work_size vector empty, \ 21637 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21638 the global_work_size vector.");
21642 work_dimension = 1;
21644 else if (temp_sz == 2){
21647 work_dimension = 2;
21654 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21661 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21663 clWaitForEvents(1, &gpuExec);
21665 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21667 clWaitForEvents(1, &gpuExec);
21669 float *result = (
float *) malloc(typesz);
21670 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21672 v->assign(result, result+sz);
21674 clReleaseCommandQueue (queue);
21675 clReleaseMemObject(buffer);
21676 clReleaseMemObject(buffer2);
21677 clReleaseMemObject(buffer3);
21678 clReleaseEvent(gpuExec);
21683 size_t sz = v->size();
21684 size_t sz2 = v2->size();
21685 size_t sz3 = v3.size();
21686 size_t typesz =
sizeof(float) * sz;
21687 size_t typesz2 =
sizeof(float) * sz2;
21688 size_t typesz3 =
sizeof(char) * sz3;
21689 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21693 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21700 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21704 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21706 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21708 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21710 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21711 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21712 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21713 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21715 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21717 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21719 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21722 size_t size[3] = {sz, sz2, sz3};
21723 size_t work_dimension = 3;
21726 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21727 work_dimension = 1;
21729 else if(temp_sz > 0){
21731 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21732 For default multidimensional global work size, leave the global_work_size vector empty, \ 21733 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21734 the global_work_size vector.");
21738 work_dimension = 1;
21740 else if (temp_sz == 2){
21743 work_dimension = 2;
21750 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21757 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21759 clWaitForEvents(1, &gpuExec);
21761 float *result = (
float *) malloc(typesz);
21762 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21764 v->assign(result, result+sz);
21766 if (typesz2 != typesz or sz != sz2){
21768 result2 = (
float *) malloc(typesz2);
21769 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21771 v2->assign(result2, result2+sz2);
21775 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21777 v2->assign(result, result+sz2);
21780 clReleaseCommandQueue (queue);
21781 clReleaseMemObject(buffer);
21782 clReleaseMemObject(buffer2);
21783 clReleaseMemObject(buffer3);
21784 clReleaseEvent(gpuExec);
21789 size_t sz = v->size();
21790 size_t sz2 = v2->size();
21791 size_t sz3 = v3->size();
21792 size_t typesz =
sizeof(float) * sz;
21793 size_t typesz2 =
sizeof(float) * sz2;
21794 size_t typesz3 =
sizeof(char) * sz3;
21795 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21799 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21806 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21810 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21812 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
21814 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
21816 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21817 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21818 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21819 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21821 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
21823 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
21825 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
21828 size_t size[3] = {sz, sz2, sz3};
21829 size_t work_dimension = 3;
21832 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21833 work_dimension = 1;
21835 else if(temp_sz > 0){
21837 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21838 For default multidimensional global work size, leave the global_work_size vector empty, \ 21839 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21840 the global_work_size vector.");
21844 work_dimension = 1;
21846 else if (temp_sz == 2){
21849 work_dimension = 2;
21856 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21863 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21865 clWaitForEvents(1, &gpuExec);
21867 float *result = (
float *) malloc(typesz);
21868 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21870 v->assign(result, result+sz);
21872 if (typesz2 != typesz or sz != sz2){
21874 result2 = (
float *) malloc(typesz2);
21875 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
21877 v2->assign(result2, result2+sz2);
21881 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21883 v2->assign(result, result+sz2);
21886 if (typesz3 != typesz or sz != sz3){
21888 result3 = (
char *) malloc(typesz3);
21889 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
21891 v3->assign(result3, result3+sz3);
21895 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
21897 v3->assign(result, result+sz3);
21900 clReleaseCommandQueue (queue);
21901 clReleaseMemObject(buffer);
21902 clReleaseMemObject(buffer2);
21903 clReleaseMemObject(buffer3);
21904 clReleaseEvent(gpuExec);
21910 size_t sz = v.size();
21911 size_t sz2 = v2.size();
21912 size_t sz3 = v3.size();
21913 size_t typesz =
sizeof(float) * sz;
21914 size_t typesz2 =
sizeof(float) * sz2;
21915 size_t typesz3 =
sizeof(int) * sz3;
21916 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
21920 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
21927 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
21931 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
21933 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
21935 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
21937 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
21938 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
21939 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
21940 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
21942 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
21944 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
21946 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
21949 size_t size[3] = {sz, sz2, sz3};
21950 size_t work_dimension = 3;
21953 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
21954 work_dimension = 1;
21956 else if(temp_sz > 0){
21958 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 21959 For default multidimensional global work size, leave the global_work_size vector empty, \ 21960 and set multi_dimensional to true. Setting the global work size based on the values inside \ 21961 the global_work_size vector.");
21965 work_dimension = 1;
21967 else if (temp_sz == 2){
21970 work_dimension = 2;
21977 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
21984 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
21986 clWaitForEvents(1, &gpuExec);
21988 float *result = (
float *) malloc(typesz);
21989 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
21991 std::vector<float> res = std::vector<float>();
21992 res.assign(result, result+sz);
21994 clReleaseCommandQueue (queue);
21995 clReleaseMemObject(buffer);
21996 clReleaseMemObject(buffer2);
21997 clReleaseMemObject(buffer3);
21998 clReleaseEvent(gpuExec);
22005 size_t sz = v->size();
22006 size_t sz2 = v2.size();
22007 size_t sz3 = v3.size();
22008 size_t typesz =
sizeof(float) * sz;
22009 size_t typesz2 =
sizeof(float) * sz2;
22010 size_t typesz3 =
sizeof(int) * sz3;
22011 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22015 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22022 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22026 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22028 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22030 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22032 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22033 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22034 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22035 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22037 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22039 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22041 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22044 size_t size[3] = {sz, sz2, sz3};
22045 size_t work_dimension = 3;
22048 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22049 work_dimension = 1;
22051 else if(temp_sz > 0){
22053 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22054 For default multidimensional global work size, leave the global_work_size vector empty, \ 22055 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22056 the global_work_size vector.");
22060 work_dimension = 1;
22062 else if (temp_sz == 2){
22065 work_dimension = 2;
22072 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22079 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22081 clWaitForEvents(1, &gpuExec);
22083 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22085 clWaitForEvents(1, &gpuExec);
22087 float *result = (
float *) malloc(typesz);
22088 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22090 v->assign(result, result+sz);
22092 clReleaseCommandQueue (queue);
22093 clReleaseMemObject(buffer);
22094 clReleaseMemObject(buffer2);
22095 clReleaseMemObject(buffer3);
22096 clReleaseEvent(gpuExec);
22101 size_t sz = v->size();
22102 size_t sz2 = v2->size();
22103 size_t sz3 = v3.size();
22104 size_t typesz =
sizeof(float) * sz;
22105 size_t typesz2 =
sizeof(float) * sz2;
22106 size_t typesz3 =
sizeof(int) * sz3;
22107 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22111 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22118 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22122 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22124 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22126 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22128 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22129 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22130 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22131 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22133 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22135 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22137 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22140 size_t size[3] = {sz, sz2, sz3};
22141 size_t work_dimension = 3;
22144 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22145 work_dimension = 1;
22147 else if(temp_sz > 0){
22149 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22150 For default multidimensional global work size, leave the global_work_size vector empty, \ 22151 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22152 the global_work_size vector.");
22156 work_dimension = 1;
22158 else if (temp_sz == 2){
22161 work_dimension = 2;
22168 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22175 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22177 clWaitForEvents(1, &gpuExec);
22179 float *result = (
float *) malloc(typesz);
22180 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22182 v->assign(result, result+sz);
22184 if (typesz2 != typesz or sz != sz2){
22186 result2 = (
float *) malloc(typesz2);
22187 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22189 v2->assign(result2, result2+sz2);
22193 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22195 v2->assign(result, result+sz2);
22198 clReleaseCommandQueue (queue);
22199 clReleaseMemObject(buffer);
22200 clReleaseMemObject(buffer2);
22201 clReleaseMemObject(buffer3);
22202 clReleaseEvent(gpuExec);
22207 size_t sz = v->size();
22208 size_t sz2 = v2->size();
22209 size_t sz3 = v3->size();
22210 size_t typesz =
sizeof(float) * sz;
22211 size_t typesz2 =
sizeof(float) * sz2;
22212 size_t typesz3 =
sizeof(int) * sz3;
22213 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22217 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22224 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22228 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22230 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22232 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
22234 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22235 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22236 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22237 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22239 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22241 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22243 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
22246 size_t size[3] = {sz, sz2, sz3};
22247 size_t work_dimension = 3;
22250 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22251 work_dimension = 1;
22253 else if(temp_sz > 0){
22255 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22256 For default multidimensional global work size, leave the global_work_size vector empty, \ 22257 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22258 the global_work_size vector.");
22262 work_dimension = 1;
22264 else if (temp_sz == 2){
22267 work_dimension = 2;
22274 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22281 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22283 clWaitForEvents(1, &gpuExec);
22285 float *result = (
float *) malloc(typesz);
22286 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22288 v->assign(result, result+sz);
22290 if (typesz2 != typesz or sz != sz2){
22292 result2 = (
float *) malloc(typesz2);
22293 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22295 v2->assign(result2, result2+sz2);
22299 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22301 v2->assign(result, result+sz2);
22304 if (typesz3 != typesz or sz != sz3){
22306 result3 = (
int *) malloc(typesz3);
22307 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
22309 v3->assign(result3, result3+sz3);
22313 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22315 v3->assign(result, result+sz3);
22318 clReleaseCommandQueue (queue);
22319 clReleaseMemObject(buffer);
22320 clReleaseMemObject(buffer2);
22321 clReleaseMemObject(buffer3);
22322 clReleaseEvent(gpuExec);
22328 size_t sz = v.size();
22329 size_t sz2 = v2.size();
22330 size_t sz3 = v3.size();
22331 size_t typesz =
sizeof(float) * sz;
22332 size_t typesz2 =
sizeof(float) * sz2;
22333 size_t typesz3 =
sizeof(float) * sz3;
22334 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22338 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22345 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22349 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22351 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22353 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22355 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22356 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22357 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22358 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22360 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
22362 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22364 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22367 size_t size[3] = {sz, sz2, sz3};
22368 size_t work_dimension = 3;
22371 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22372 work_dimension = 1;
22374 else if(temp_sz > 0){
22376 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22377 For default multidimensional global work size, leave the global_work_size vector empty, \ 22378 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22379 the global_work_size vector.");
22383 work_dimension = 1;
22385 else if (temp_sz == 2){
22388 work_dimension = 2;
22395 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22402 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22404 clWaitForEvents(1, &gpuExec);
22406 float *result = (
float *) malloc(typesz);
22407 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22409 std::vector<float> res = std::vector<float>();
22410 res.assign(result, result+sz);
22412 clReleaseCommandQueue (queue);
22413 clReleaseMemObject(buffer);
22414 clReleaseMemObject(buffer2);
22415 clReleaseMemObject(buffer3);
22416 clReleaseEvent(gpuExec);
22423 size_t sz = v->size();
22424 size_t sz2 = v2.size();
22425 size_t sz3 = v3.size();
22426 size_t typesz =
sizeof(float) * sz;
22427 size_t typesz2 =
sizeof(float) * sz2;
22428 size_t typesz3 =
sizeof(float) * sz3;
22429 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22433 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22440 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22444 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22446 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22448 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22450 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22451 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22452 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22453 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22455 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22457 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22459 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22462 size_t size[3] = {sz, sz2, sz3};
22463 size_t work_dimension = 3;
22466 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22467 work_dimension = 1;
22469 else if(temp_sz > 0){
22471 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22472 For default multidimensional global work size, leave the global_work_size vector empty, \ 22473 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22474 the global_work_size vector.");
22478 work_dimension = 1;
22480 else if (temp_sz == 2){
22483 work_dimension = 2;
22490 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22497 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22499 clWaitForEvents(1, &gpuExec);
22501 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22503 clWaitForEvents(1, &gpuExec);
22505 float *result = (
float *) malloc(typesz);
22506 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22508 v->assign(result, result+sz);
22510 clReleaseCommandQueue (queue);
22511 clReleaseMemObject(buffer);
22512 clReleaseMemObject(buffer2);
22513 clReleaseMemObject(buffer3);
22514 clReleaseEvent(gpuExec);
22519 size_t sz = v->size();
22520 size_t sz2 = v2->size();
22521 size_t sz3 = v3.size();
22522 size_t typesz =
sizeof(float) * sz;
22523 size_t typesz2 =
sizeof(float) * sz2;
22524 size_t typesz3 =
sizeof(float) * sz3;
22525 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22529 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22536 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22540 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22542 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22544 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22546 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22547 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22548 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22549 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22551 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22553 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22555 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22558 size_t size[3] = {sz, sz2, sz3};
22559 size_t work_dimension = 3;
22562 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22563 work_dimension = 1;
22565 else if(temp_sz > 0){
22567 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22568 For default multidimensional global work size, leave the global_work_size vector empty, \ 22569 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22570 the global_work_size vector.");
22574 work_dimension = 1;
22576 else if (temp_sz == 2){
22579 work_dimension = 2;
22586 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22593 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22595 clWaitForEvents(1, &gpuExec);
22597 float *result = (
float *) malloc(typesz);
22598 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22600 v->assign(result, result+sz);
22602 if (typesz2 != typesz or sz != sz2){
22604 result2 = (
float *) malloc(typesz2);
22605 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22607 v2->assign(result2, result2+sz2);
22611 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22613 v2->assign(result, result+sz2);
22616 clReleaseCommandQueue (queue);
22617 clReleaseMemObject(buffer);
22618 clReleaseMemObject(buffer2);
22619 clReleaseMemObject(buffer3);
22620 clReleaseEvent(gpuExec);
22625 size_t sz = v->size();
22626 size_t sz2 = v2->size();
22627 size_t sz3 = v3->size();
22628 size_t typesz =
sizeof(float) * sz;
22629 size_t typesz2 =
sizeof(float) * sz2;
22630 size_t typesz3 =
sizeof(float) * sz3;
22631 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22635 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22642 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22646 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22648 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22650 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
22652 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22653 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22654 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22655 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22657 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22659 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22661 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
22664 size_t size[3] = {sz, sz2, sz3};
22665 size_t work_dimension = 3;
22668 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22669 work_dimension = 1;
22671 else if(temp_sz > 0){
22673 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22674 For default multidimensional global work size, leave the global_work_size vector empty, \ 22675 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22676 the global_work_size vector.");
22680 work_dimension = 1;
22682 else if (temp_sz == 2){
22685 work_dimension = 2;
22692 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22699 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22701 clWaitForEvents(1, &gpuExec);
22703 float *result = (
float *) malloc(typesz);
22704 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22706 v->assign(result, result+sz);
22708 if (typesz2 != typesz or sz != sz2){
22710 result2 = (
float *) malloc(typesz2);
22711 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
22713 v2->assign(result2, result2+sz2);
22717 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22719 v2->assign(result, result+sz2);
22722 if (typesz3 != typesz or sz != sz3){
22724 result3 = (
float *) malloc(typesz3);
22725 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
22727 v3->assign(result3, result3+sz3);
22731 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
22733 v3->assign(result, result+sz3);
22736 clReleaseCommandQueue (queue);
22737 clReleaseMemObject(buffer);
22738 clReleaseMemObject(buffer2);
22739 clReleaseMemObject(buffer3);
22740 clReleaseEvent(gpuExec);
22746 size_t sz = v.size();
22747 size_t sz2 = v2.size();
22748 size_t sz3 = v3.size();
22749 size_t typesz =
sizeof(float) * sz;
22750 size_t typesz2 =
sizeof(float) * sz2;
22751 size_t typesz3 =
sizeof(double) * sz3;
22752 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22756 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22763 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22767 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22769 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22771 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22773 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22774 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22775 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22776 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22778 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
22780 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22782 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22785 size_t size[3] = {sz, sz2, sz3};
22786 size_t work_dimension = 3;
22789 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22790 work_dimension = 1;
22792 else if(temp_sz > 0){
22794 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22795 For default multidimensional global work size, leave the global_work_size vector empty, \ 22796 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22797 the global_work_size vector.");
22801 work_dimension = 1;
22803 else if (temp_sz == 2){
22806 work_dimension = 2;
22813 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22820 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22822 clWaitForEvents(1, &gpuExec);
22824 float *result = (
float *) malloc(typesz);
22825 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22827 std::vector<float> res = std::vector<float>();
22828 res.assign(result, result+sz);
22830 clReleaseCommandQueue (queue);
22831 clReleaseMemObject(buffer);
22832 clReleaseMemObject(buffer2);
22833 clReleaseMemObject(buffer3);
22834 clReleaseEvent(gpuExec);
22841 size_t sz = v->size();
22842 size_t sz2 = v2.size();
22843 size_t sz3 = v3.size();
22844 size_t typesz =
sizeof(float) * sz;
22845 size_t typesz2 =
sizeof(float) * sz2;
22846 size_t typesz3 =
sizeof(double) * sz3;
22847 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22851 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22858 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22862 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22864 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
22866 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22868 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22869 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22870 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22871 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22873 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22875 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
22877 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22880 size_t size[3] = {sz, sz2, sz3};
22881 size_t work_dimension = 3;
22884 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22885 work_dimension = 1;
22887 else if(temp_sz > 0){
22889 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22890 For default multidimensional global work size, leave the global_work_size vector empty, \ 22891 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22892 the global_work_size vector.");
22896 work_dimension = 1;
22898 else if (temp_sz == 2){
22901 work_dimension = 2;
22908 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
22915 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22917 clWaitForEvents(1, &gpuExec);
22919 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
22921 clWaitForEvents(1, &gpuExec);
22923 float *result = (
float *) malloc(typesz);
22924 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
22926 v->assign(result, result+sz);
22928 clReleaseCommandQueue (queue);
22929 clReleaseMemObject(buffer);
22930 clReleaseMemObject(buffer2);
22931 clReleaseMemObject(buffer3);
22932 clReleaseEvent(gpuExec);
22937 size_t sz = v->size();
22938 size_t sz2 = v2->size();
22939 size_t sz3 = v3.size();
22940 size_t typesz =
sizeof(float) * sz;
22941 size_t typesz2 =
sizeof(float) * sz2;
22942 size_t typesz3 =
sizeof(double) * sz3;
22943 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
22947 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
22954 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
22958 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
22960 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
22962 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
22964 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
22965 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
22966 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
22967 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
22969 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
22971 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
22973 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
22976 size_t size[3] = {sz, sz2, sz3};
22977 size_t work_dimension = 3;
22980 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
22981 work_dimension = 1;
22983 else if(temp_sz > 0){
22985 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 22986 For default multidimensional global work size, leave the global_work_size vector empty, \ 22987 and set multi_dimensional to true. Setting the global work size based on the values inside \ 22988 the global_work_size vector.");
22992 work_dimension = 1;
22994 else if (temp_sz == 2){
22997 work_dimension = 2;
23004 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23011 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23013 clWaitForEvents(1, &gpuExec);
23015 float *result = (
float *) malloc(typesz);
23016 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23018 v->assign(result, result+sz);
23020 if (typesz2 != typesz or sz != sz2){
23022 result2 = (
float *) malloc(typesz2);
23023 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23025 v2->assign(result2, result2+sz2);
23029 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23031 v2->assign(result, result+sz2);
23034 clReleaseCommandQueue (queue);
23035 clReleaseMemObject(buffer);
23036 clReleaseMemObject(buffer2);
23037 clReleaseMemObject(buffer3);
23038 clReleaseEvent(gpuExec);
23043 size_t sz = v->size();
23044 size_t sz2 = v2->size();
23045 size_t sz3 = v3->size();
23046 size_t typesz =
sizeof(float) * sz;
23047 size_t typesz2 =
sizeof(float) * sz2;
23048 size_t typesz3 =
sizeof(double) * sz3;
23049 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23053 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23060 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23064 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23066 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23068 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23070 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23071 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23072 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23073 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23075 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23077 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23079 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23082 size_t size[3] = {sz, sz2, sz3};
23083 size_t work_dimension = 3;
23086 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23087 work_dimension = 1;
23089 else if(temp_sz > 0){
23091 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23092 For default multidimensional global work size, leave the global_work_size vector empty, \ 23093 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23094 the global_work_size vector.");
23098 work_dimension = 1;
23100 else if (temp_sz == 2){
23103 work_dimension = 2;
23110 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23117 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23119 clWaitForEvents(1, &gpuExec);
23121 float *result = (
float *) malloc(typesz);
23122 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23124 v->assign(result, result+sz);
23126 if (typesz2 != typesz or sz != sz2){
23128 result2 = (
float *) malloc(typesz2);
23129 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23131 v2->assign(result2, result2+sz2);
23135 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23137 v2->assign(result, result+sz2);
23140 if (typesz3 != typesz or sz != sz3){
23142 result3 = (
double *) malloc(typesz3);
23143 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23145 v3->assign(result3, result3+sz3);
23149 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23151 v3->assign(result, result+sz3);
23154 clReleaseCommandQueue (queue);
23155 clReleaseMemObject(buffer);
23156 clReleaseMemObject(buffer2);
23157 clReleaseMemObject(buffer3);
23158 clReleaseEvent(gpuExec);
23164 size_t sz = v.size();
23165 size_t sz2 = v2.size();
23166 size_t sz3 = v3.size();
23167 size_t typesz =
sizeof(float) * sz;
23168 size_t typesz2 =
sizeof(double) * sz2;
23169 size_t typesz3 =
sizeof(char) * sz3;
23170 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23174 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23181 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23185 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23187 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23189 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23191 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23192 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23193 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23194 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23196 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
23198 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23200 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23203 size_t size[3] = {sz, sz2, sz3};
23204 size_t work_dimension = 3;
23207 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23208 work_dimension = 1;
23210 else if(temp_sz > 0){
23212 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23213 For default multidimensional global work size, leave the global_work_size vector empty, \ 23214 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23215 the global_work_size vector.");
23219 work_dimension = 1;
23221 else if (temp_sz == 2){
23224 work_dimension = 2;
23231 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23238 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23240 clWaitForEvents(1, &gpuExec);
23242 float *result = (
float *) malloc(typesz);
23243 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23245 std::vector<float> res = std::vector<float>();
23246 res.assign(result, result+sz);
23248 clReleaseCommandQueue (queue);
23249 clReleaseMemObject(buffer);
23250 clReleaseMemObject(buffer2);
23251 clReleaseMemObject(buffer3);
23252 clReleaseEvent(gpuExec);
23259 size_t sz = v->size();
23260 size_t sz2 = v2.size();
23261 size_t sz3 = v3.size();
23262 size_t typesz =
sizeof(float) * sz;
23263 size_t typesz2 =
sizeof(double) * sz2;
23264 size_t typesz3 =
sizeof(char) * sz3;
23265 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23269 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23276 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23280 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23282 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23284 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23286 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23287 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23288 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23289 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23291 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23293 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23295 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23298 size_t size[3] = {sz, sz2, sz3};
23299 size_t work_dimension = 3;
23302 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23303 work_dimension = 1;
23305 else if(temp_sz > 0){
23307 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23308 For default multidimensional global work size, leave the global_work_size vector empty, \ 23309 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23310 the global_work_size vector.");
23314 work_dimension = 1;
23316 else if (temp_sz == 2){
23319 work_dimension = 2;
23326 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23333 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23335 clWaitForEvents(1, &gpuExec);
23337 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23339 clWaitForEvents(1, &gpuExec);
23341 float *result = (
float *) malloc(typesz);
23342 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23344 v->assign(result, result+sz);
23346 clReleaseCommandQueue (queue);
23347 clReleaseMemObject(buffer);
23348 clReleaseMemObject(buffer2);
23349 clReleaseMemObject(buffer3);
23350 clReleaseEvent(gpuExec);
23355 size_t sz = v->size();
23356 size_t sz2 = v2->size();
23357 size_t sz3 = v3.size();
23358 size_t typesz =
sizeof(float) * sz;
23359 size_t typesz2 =
sizeof(double) * sz2;
23360 size_t typesz3 =
sizeof(char) * sz3;
23361 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23365 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23372 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23376 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23378 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23380 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23382 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23383 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23384 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23385 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23387 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23389 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23391 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23394 size_t size[3] = {sz, sz2, sz3};
23395 size_t work_dimension = 3;
23398 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23399 work_dimension = 1;
23401 else if(temp_sz > 0){
23403 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23404 For default multidimensional global work size, leave the global_work_size vector empty, \ 23405 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23406 the global_work_size vector.");
23410 work_dimension = 1;
23412 else if (temp_sz == 2){
23415 work_dimension = 2;
23422 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23429 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23431 clWaitForEvents(1, &gpuExec);
23433 float *result = (
float *) malloc(typesz);
23434 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23436 v->assign(result, result+sz);
23438 if (typesz2 != typesz or sz != sz2){
23440 result2 = (
double *) malloc(typesz2);
23441 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23443 v2->assign(result2, result2+sz2);
23447 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23449 v2->assign(result, result+sz2);
23452 clReleaseCommandQueue (queue);
23453 clReleaseMemObject(buffer);
23454 clReleaseMemObject(buffer2);
23455 clReleaseMemObject(buffer3);
23456 clReleaseEvent(gpuExec);
23461 size_t sz = v->size();
23462 size_t sz2 = v2->size();
23463 size_t sz3 = v3->size();
23464 size_t typesz =
sizeof(float) * sz;
23465 size_t typesz2 =
sizeof(double) * sz2;
23466 size_t typesz3 =
sizeof(char) * sz3;
23467 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23471 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23478 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23482 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23484 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23486 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23488 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23489 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23490 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23491 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23493 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23495 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23497 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23500 size_t size[3] = {sz, sz2, sz3};
23501 size_t work_dimension = 3;
23504 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23505 work_dimension = 1;
23507 else if(temp_sz > 0){
23509 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23510 For default multidimensional global work size, leave the global_work_size vector empty, \ 23511 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23512 the global_work_size vector.");
23516 work_dimension = 1;
23518 else if (temp_sz == 2){
23521 work_dimension = 2;
23528 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23535 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23537 clWaitForEvents(1, &gpuExec);
23539 float *result = (
float *) malloc(typesz);
23540 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23542 v->assign(result, result+sz);
23544 if (typesz2 != typesz or sz != sz2){
23546 result2 = (
double *) malloc(typesz2);
23547 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23549 v2->assign(result2, result2+sz2);
23553 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23555 v2->assign(result, result+sz2);
23558 if (typesz3 != typesz or sz != sz3){
23560 result3 = (
char *) malloc(typesz3);
23561 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23563 v3->assign(result3, result3+sz3);
23567 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23569 v3->assign(result, result+sz3);
23572 clReleaseCommandQueue (queue);
23573 clReleaseMemObject(buffer);
23574 clReleaseMemObject(buffer2);
23575 clReleaseMemObject(buffer3);
23576 clReleaseEvent(gpuExec);
23582 size_t sz = v.size();
23583 size_t sz2 = v2.size();
23584 size_t sz3 = v3.size();
23585 size_t typesz =
sizeof(float) * sz;
23586 size_t typesz2 =
sizeof(double) * sz2;
23587 size_t typesz3 =
sizeof(int) * sz3;
23588 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23592 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23599 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23603 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23605 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23607 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23609 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23610 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23611 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23612 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23614 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
23616 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23618 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23621 size_t size[3] = {sz, sz2, sz3};
23622 size_t work_dimension = 3;
23625 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23626 work_dimension = 1;
23628 else if(temp_sz > 0){
23630 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23631 For default multidimensional global work size, leave the global_work_size vector empty, \ 23632 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23633 the global_work_size vector.");
23637 work_dimension = 1;
23639 else if (temp_sz == 2){
23642 work_dimension = 2;
23649 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23656 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23658 clWaitForEvents(1, &gpuExec);
23660 float *result = (
float *) malloc(typesz);
23661 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23663 std::vector<float> res = std::vector<float>();
23664 res.assign(result, result+sz);
23666 clReleaseCommandQueue (queue);
23667 clReleaseMemObject(buffer);
23668 clReleaseMemObject(buffer2);
23669 clReleaseMemObject(buffer3);
23670 clReleaseEvent(gpuExec);
23677 size_t sz = v->size();
23678 size_t sz2 = v2.size();
23679 size_t sz3 = v3.size();
23680 size_t typesz =
sizeof(float) * sz;
23681 size_t typesz2 =
sizeof(double) * sz2;
23682 size_t typesz3 =
sizeof(int) * sz3;
23683 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23687 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23694 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23698 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23700 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
23702 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23704 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23705 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23706 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23707 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23709 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23711 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
23713 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23716 size_t size[3] = {sz, sz2, sz3};
23717 size_t work_dimension = 3;
23720 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23721 work_dimension = 1;
23723 else if(temp_sz > 0){
23725 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23726 For default multidimensional global work size, leave the global_work_size vector empty, \ 23727 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23728 the global_work_size vector.");
23732 work_dimension = 1;
23734 else if (temp_sz == 2){
23737 work_dimension = 2;
23744 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23751 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23753 clWaitForEvents(1, &gpuExec);
23755 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23757 clWaitForEvents(1, &gpuExec);
23759 float *result = (
float *) malloc(typesz);
23760 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23762 v->assign(result, result+sz);
23764 clReleaseCommandQueue (queue);
23765 clReleaseMemObject(buffer);
23766 clReleaseMemObject(buffer2);
23767 clReleaseMemObject(buffer3);
23768 clReleaseEvent(gpuExec);
23773 size_t sz = v->size();
23774 size_t sz2 = v2->size();
23775 size_t sz3 = v3.size();
23776 size_t typesz =
sizeof(float) * sz;
23777 size_t typesz2 =
sizeof(double) * sz2;
23778 size_t typesz3 =
sizeof(int) * sz3;
23779 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23783 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23790 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23794 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23796 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23798 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
23800 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23801 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23802 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23803 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23805 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23807 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23809 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
23812 size_t size[3] = {sz, sz2, sz3};
23813 size_t work_dimension = 3;
23816 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23817 work_dimension = 1;
23819 else if(temp_sz > 0){
23821 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23822 For default multidimensional global work size, leave the global_work_size vector empty, \ 23823 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23824 the global_work_size vector.");
23828 work_dimension = 1;
23830 else if (temp_sz == 2){
23833 work_dimension = 2;
23840 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23847 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23849 clWaitForEvents(1, &gpuExec);
23851 float *result = (
float *) malloc(typesz);
23852 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23854 v->assign(result, result+sz);
23856 if (typesz2 != typesz or sz != sz2){
23858 result2 = (
double *) malloc(typesz2);
23859 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23861 v2->assign(result2, result2+sz2);
23865 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23867 v2->assign(result, result+sz2);
23870 clReleaseCommandQueue (queue);
23871 clReleaseMemObject(buffer);
23872 clReleaseMemObject(buffer2);
23873 clReleaseMemObject(buffer3);
23874 clReleaseEvent(gpuExec);
23879 size_t sz = v->size();
23880 size_t sz2 = v2->size();
23881 size_t sz3 = v3->size();
23882 size_t typesz =
sizeof(float) * sz;
23883 size_t typesz2 =
sizeof(double) * sz2;
23884 size_t typesz3 =
sizeof(int) * sz3;
23885 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
23889 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
23896 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
23900 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
23902 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
23904 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
23906 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
23907 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
23908 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
23909 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
23911 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
23913 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
23915 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
23918 size_t size[3] = {sz, sz2, sz3};
23919 size_t work_dimension = 3;
23922 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
23923 work_dimension = 1;
23925 else if(temp_sz > 0){
23927 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 23928 For default multidimensional global work size, leave the global_work_size vector empty, \ 23929 and set multi_dimensional to true. Setting the global work size based on the values inside \ 23930 the global_work_size vector.");
23934 work_dimension = 1;
23936 else if (temp_sz == 2){
23939 work_dimension = 2;
23946 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
23953 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
23955 clWaitForEvents(1, &gpuExec);
23957 float *result = (
float *) malloc(typesz);
23958 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
23960 v->assign(result, result+sz);
23962 if (typesz2 != typesz or sz != sz2){
23964 result2 = (
double *) malloc(typesz2);
23965 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
23967 v2->assign(result2, result2+sz2);
23971 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23973 v2->assign(result, result+sz2);
23976 if (typesz3 != typesz or sz != sz3){
23978 result3 = (
int *) malloc(typesz3);
23979 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
23981 v3->assign(result3, result3+sz3);
23985 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
23987 v3->assign(result, result+sz3);
23990 clReleaseCommandQueue (queue);
23991 clReleaseMemObject(buffer);
23992 clReleaseMemObject(buffer2);
23993 clReleaseMemObject(buffer3);
23994 clReleaseEvent(gpuExec);
24000 size_t sz = v.size();
24001 size_t sz2 = v2.size();
24002 size_t sz3 = v3.size();
24003 size_t typesz =
sizeof(float) * sz;
24004 size_t typesz2 =
sizeof(double) * sz2;
24005 size_t typesz3 =
sizeof(float) * sz3;
24006 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24010 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24017 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24021 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24023 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24025 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24027 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24028 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24029 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24030 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24032 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24034 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24036 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24039 size_t size[3] = {sz, sz2, sz3};
24040 size_t work_dimension = 3;
24043 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24044 work_dimension = 1;
24046 else if(temp_sz > 0){
24048 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24049 For default multidimensional global work size, leave the global_work_size vector empty, \ 24050 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24051 the global_work_size vector.");
24055 work_dimension = 1;
24057 else if (temp_sz == 2){
24060 work_dimension = 2;
24067 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24074 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24076 clWaitForEvents(1, &gpuExec);
24078 float *result = (
float *) malloc(typesz);
24079 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24081 std::vector<float> res = std::vector<float>();
24082 res.assign(result, result+sz);
24084 clReleaseCommandQueue (queue);
24085 clReleaseMemObject(buffer);
24086 clReleaseMemObject(buffer2);
24087 clReleaseMemObject(buffer3);
24088 clReleaseEvent(gpuExec);
24095 size_t sz = v->size();
24096 size_t sz2 = v2.size();
24097 size_t sz3 = v3.size();
24098 size_t typesz =
sizeof(float) * sz;
24099 size_t typesz2 =
sizeof(double) * sz2;
24100 size_t typesz3 =
sizeof(float) * sz3;
24101 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24105 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24112 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24116 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24118 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24120 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24122 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24123 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24124 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24125 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24127 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24129 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24131 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24134 size_t size[3] = {sz, sz2, sz3};
24135 size_t work_dimension = 3;
24138 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24139 work_dimension = 1;
24141 else if(temp_sz > 0){
24143 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24144 For default multidimensional global work size, leave the global_work_size vector empty, \ 24145 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24146 the global_work_size vector.");
24150 work_dimension = 1;
24152 else if (temp_sz == 2){
24155 work_dimension = 2;
24162 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24169 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24171 clWaitForEvents(1, &gpuExec);
24173 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24175 clWaitForEvents(1, &gpuExec);
24177 float *result = (
float *) malloc(typesz);
24178 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24180 v->assign(result, result+sz);
24182 clReleaseCommandQueue (queue);
24183 clReleaseMemObject(buffer);
24184 clReleaseMemObject(buffer2);
24185 clReleaseMemObject(buffer3);
24186 clReleaseEvent(gpuExec);
24191 size_t sz = v->size();
24192 size_t sz2 = v2->size();
24193 size_t sz3 = v3.size();
24194 size_t typesz =
sizeof(float) * sz;
24195 size_t typesz2 =
sizeof(double) * sz2;
24196 size_t typesz3 =
sizeof(float) * sz3;
24197 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24201 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24208 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24212 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24214 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24216 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24218 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24219 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24220 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24221 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24223 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24225 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24227 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24230 size_t size[3] = {sz, sz2, sz3};
24231 size_t work_dimension = 3;
24234 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24235 work_dimension = 1;
24237 else if(temp_sz > 0){
24239 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24240 For default multidimensional global work size, leave the global_work_size vector empty, \ 24241 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24242 the global_work_size vector.");
24246 work_dimension = 1;
24248 else if (temp_sz == 2){
24251 work_dimension = 2;
24258 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24265 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24267 clWaitForEvents(1, &gpuExec);
24269 float *result = (
float *) malloc(typesz);
24270 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24272 v->assign(result, result+sz);
24274 if (typesz2 != typesz or sz != sz2){
24276 result2 = (
double *) malloc(typesz2);
24277 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24279 v2->assign(result2, result2+sz2);
24283 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24285 v2->assign(result, result+sz2);
24288 clReleaseCommandQueue (queue);
24289 clReleaseMemObject(buffer);
24290 clReleaseMemObject(buffer2);
24291 clReleaseMemObject(buffer3);
24292 clReleaseEvent(gpuExec);
24297 size_t sz = v->size();
24298 size_t sz2 = v2->size();
24299 size_t sz3 = v3->size();
24300 size_t typesz =
sizeof(float) * sz;
24301 size_t typesz2 =
sizeof(double) * sz2;
24302 size_t typesz3 =
sizeof(float) * sz3;
24303 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24307 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24314 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24318 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24320 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24322 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
24324 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24325 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24326 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24327 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24329 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24331 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24333 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
24336 size_t size[3] = {sz, sz2, sz3};
24337 size_t work_dimension = 3;
24340 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24341 work_dimension = 1;
24343 else if(temp_sz > 0){
24345 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24346 For default multidimensional global work size, leave the global_work_size vector empty, \ 24347 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24348 the global_work_size vector.");
24352 work_dimension = 1;
24354 else if (temp_sz == 2){
24357 work_dimension = 2;
24364 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24371 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24373 clWaitForEvents(1, &gpuExec);
24375 float *result = (
float *) malloc(typesz);
24376 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24378 v->assign(result, result+sz);
24380 if (typesz2 != typesz or sz != sz2){
24382 result2 = (
double *) malloc(typesz2);
24383 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24385 v2->assign(result2, result2+sz2);
24389 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24391 v2->assign(result, result+sz2);
24394 if (typesz3 != typesz or sz != sz3){
24396 result3 = (
float *) malloc(typesz3);
24397 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
24399 v3->assign(result3, result3+sz3);
24403 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24405 v3->assign(result, result+sz3);
24408 clReleaseCommandQueue (queue);
24409 clReleaseMemObject(buffer);
24410 clReleaseMemObject(buffer2);
24411 clReleaseMemObject(buffer3);
24412 clReleaseEvent(gpuExec);
24418 size_t sz = v.size();
24419 size_t sz2 = v2.size();
24420 size_t sz3 = v3.size();
24421 size_t typesz =
sizeof(float) * sz;
24422 size_t typesz2 =
sizeof(double) * sz2;
24423 size_t typesz3 =
sizeof(double) * sz3;
24424 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24428 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24435 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24439 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24441 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24443 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24445 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24446 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24447 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24448 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24450 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24452 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24454 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24457 size_t size[3] = {sz, sz2, sz3};
24458 size_t work_dimension = 3;
24461 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24462 work_dimension = 1;
24464 else if(temp_sz > 0){
24466 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24467 For default multidimensional global work size, leave the global_work_size vector empty, \ 24468 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24469 the global_work_size vector.");
24473 work_dimension = 1;
24475 else if (temp_sz == 2){
24478 work_dimension = 2;
24485 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24492 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24494 clWaitForEvents(1, &gpuExec);
24496 float *result = (
float *) malloc(typesz);
24497 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24499 std::vector<float> res = std::vector<float>();
24500 res.assign(result, result+sz);
24502 clReleaseCommandQueue (queue);
24503 clReleaseMemObject(buffer);
24504 clReleaseMemObject(buffer2);
24505 clReleaseMemObject(buffer3);
24506 clReleaseEvent(gpuExec);
24513 size_t sz = v->size();
24514 size_t sz2 = v2.size();
24515 size_t sz3 = v3.size();
24516 size_t typesz =
sizeof(float) * sz;
24517 size_t typesz2 =
sizeof(double) * sz2;
24518 size_t typesz3 =
sizeof(double) * sz3;
24519 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24523 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24530 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24534 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24536 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24538 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24540 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24541 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24542 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24543 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24545 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24547 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24549 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24552 size_t size[3] = {sz, sz2, sz3};
24553 size_t work_dimension = 3;
24556 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24557 work_dimension = 1;
24559 else if(temp_sz > 0){
24561 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24562 For default multidimensional global work size, leave the global_work_size vector empty, \ 24563 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24564 the global_work_size vector.");
24568 work_dimension = 1;
24570 else if (temp_sz == 2){
24573 work_dimension = 2;
24580 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24587 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24589 clWaitForEvents(1, &gpuExec);
24591 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24593 clWaitForEvents(1, &gpuExec);
24595 float *result = (
float *) malloc(typesz);
24596 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24598 v->assign(result, result+sz);
24600 clReleaseCommandQueue (queue);
24601 clReleaseMemObject(buffer);
24602 clReleaseMemObject(buffer2);
24603 clReleaseMemObject(buffer3);
24604 clReleaseEvent(gpuExec);
24609 size_t sz = v->size();
24610 size_t sz2 = v2->size();
24611 size_t sz3 = v3.size();
24612 size_t typesz =
sizeof(float) * sz;
24613 size_t typesz2 =
sizeof(double) * sz2;
24614 size_t typesz3 =
sizeof(double) * sz3;
24615 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24619 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24626 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24630 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24632 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24634 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24636 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24637 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24638 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24639 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24641 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24643 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24645 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24648 size_t size[3] = {sz, sz2, sz3};
24649 size_t work_dimension = 3;
24652 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24653 work_dimension = 1;
24655 else if(temp_sz > 0){
24657 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24658 For default multidimensional global work size, leave the global_work_size vector empty, \ 24659 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24660 the global_work_size vector.");
24664 work_dimension = 1;
24666 else if (temp_sz == 2){
24669 work_dimension = 2;
24676 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24683 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24685 clWaitForEvents(1, &gpuExec);
24687 float *result = (
float *) malloc(typesz);
24688 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24690 v->assign(result, result+sz);
24692 if (typesz2 != typesz or sz != sz2){
24694 result2 = (
double *) malloc(typesz2);
24695 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24697 v2->assign(result2, result2+sz2);
24701 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24703 v2->assign(result, result+sz2);
24706 clReleaseCommandQueue (queue);
24707 clReleaseMemObject(buffer);
24708 clReleaseMemObject(buffer2);
24709 clReleaseMemObject(buffer3);
24710 clReleaseEvent(gpuExec);
24715 size_t sz = v->size();
24716 size_t sz2 = v2->size();
24717 size_t sz3 = v3->size();
24718 size_t typesz =
sizeof(float) * sz;
24719 size_t typesz2 =
sizeof(double) * sz2;
24720 size_t typesz3 =
sizeof(double) * sz3;
24721 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24725 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24732 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24736 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24738 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
24740 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
24742 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24743 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24744 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24745 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24747 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24749 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
24751 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
24754 size_t size[3] = {sz, sz2, sz3};
24755 size_t work_dimension = 3;
24758 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24759 work_dimension = 1;
24761 else if(temp_sz > 0){
24763 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24764 For default multidimensional global work size, leave the global_work_size vector empty, \ 24765 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24766 the global_work_size vector.");
24770 work_dimension = 1;
24772 else if (temp_sz == 2){
24775 work_dimension = 2;
24782 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24789 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24791 clWaitForEvents(1, &gpuExec);
24793 float *result = (
float *) malloc(typesz);
24794 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24796 v->assign(result, result+sz);
24798 if (typesz2 != typesz or sz != sz2){
24800 result2 = (
double *) malloc(typesz2);
24801 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
24803 v2->assign(result2, result2+sz2);
24807 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24809 v2->assign(result, result+sz2);
24812 if (typesz3 != typesz or sz != sz3){
24814 result3 = (
double *) malloc(typesz3);
24815 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
24817 v3->assign(result3, result3+sz3);
24821 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
24823 v3->assign(result, result+sz3);
24826 clReleaseCommandQueue (queue);
24827 clReleaseMemObject(buffer);
24828 clReleaseMemObject(buffer2);
24829 clReleaseMemObject(buffer3);
24830 clReleaseEvent(gpuExec);
24836 size_t sz = v.size();
24837 size_t sz2 = v2.size();
24838 size_t sz3 = v3.size();
24839 size_t typesz =
sizeof(double) * sz;
24840 size_t typesz2 =
sizeof(char) * sz2;
24841 size_t typesz3 =
sizeof(char) * sz3;
24842 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24846 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24853 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24857 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24859 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24861 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24863 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24864 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24865 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24866 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24868 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
24870 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24872 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24875 size_t size[3] = {sz, sz2, sz3};
24876 size_t work_dimension = 3;
24879 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24880 work_dimension = 1;
24882 else if(temp_sz > 0){
24884 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24885 For default multidimensional global work size, leave the global_work_size vector empty, \ 24886 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24887 the global_work_size vector.");
24891 work_dimension = 1;
24893 else if (temp_sz == 2){
24896 work_dimension = 2;
24903 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
24910 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
24912 clWaitForEvents(1, &gpuExec);
24914 double *result = (
double *) malloc(typesz);
24915 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
24917 std::vector<double> res = std::vector<double>();
24918 res.assign(result, result+sz);
24920 clReleaseCommandQueue (queue);
24921 clReleaseMemObject(buffer);
24922 clReleaseMemObject(buffer2);
24923 clReleaseMemObject(buffer3);
24924 clReleaseEvent(gpuExec);
24931 size_t sz = v->size();
24932 size_t sz2 = v2.size();
24933 size_t sz3 = v3.size();
24934 size_t typesz =
sizeof(double) * sz;
24935 size_t typesz2 =
sizeof(char) * sz2;
24936 size_t typesz3 =
sizeof(char) * sz3;
24937 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
24941 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
24948 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
24952 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
24954 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
24956 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
24958 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
24959 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
24960 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
24961 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
24963 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
24965 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
24967 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
24970 size_t size[3] = {sz, sz2, sz3};
24971 size_t work_dimension = 3;
24974 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
24975 work_dimension = 1;
24977 else if(temp_sz > 0){
24979 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 24980 For default multidimensional global work size, leave the global_work_size vector empty, \ 24981 and set multi_dimensional to true. Setting the global work size based on the values inside \ 24982 the global_work_size vector.");
24986 work_dimension = 1;
24988 else if (temp_sz == 2){
24991 work_dimension = 2;
24998 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25005 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25007 clWaitForEvents(1, &gpuExec);
25009 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25011 clWaitForEvents(1, &gpuExec);
25013 double *result = (
double *) malloc(typesz);
25014 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25016 v->assign(result, result+sz);
25018 clReleaseCommandQueue (queue);
25019 clReleaseMemObject(buffer);
25020 clReleaseMemObject(buffer2);
25021 clReleaseMemObject(buffer3);
25022 clReleaseEvent(gpuExec);
25027 size_t sz = v->size();
25028 size_t sz2 = v2->size();
25029 size_t sz3 = v3.size();
25030 size_t typesz =
sizeof(double) * sz;
25031 size_t typesz2 =
sizeof(char) * sz2;
25032 size_t typesz3 =
sizeof(char) * sz3;
25033 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25037 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25044 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25048 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25050 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25052 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25054 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25055 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25056 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25057 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25059 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25061 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25063 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25066 size_t size[3] = {sz, sz2, sz3};
25067 size_t work_dimension = 3;
25070 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25071 work_dimension = 1;
25073 else if(temp_sz > 0){
25075 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25076 For default multidimensional global work size, leave the global_work_size vector empty, \ 25077 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25078 the global_work_size vector.");
25082 work_dimension = 1;
25084 else if (temp_sz == 2){
25087 work_dimension = 2;
25094 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25101 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25103 clWaitForEvents(1, &gpuExec);
25105 double *result = (
double *) malloc(typesz);
25106 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25108 v->assign(result, result+sz);
25110 if (typesz2 != typesz or sz != sz2){
25112 result2 = (
char *) malloc(typesz2);
25113 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25115 v2->assign(result2, result2+sz2);
25119 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25121 v2->assign(result, result+sz2);
25124 clReleaseCommandQueue (queue);
25125 clReleaseMemObject(buffer);
25126 clReleaseMemObject(buffer2);
25127 clReleaseMemObject(buffer3);
25128 clReleaseEvent(gpuExec);
25133 size_t sz = v->size();
25134 size_t sz2 = v2->size();
25135 size_t sz3 = v3->size();
25136 size_t typesz =
sizeof(double) * sz;
25137 size_t typesz2 =
sizeof(char) * sz2;
25138 size_t typesz3 =
sizeof(char) * sz3;
25139 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25143 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25150 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25154 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25156 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25158 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25160 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25161 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25162 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25163 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25165 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25167 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25169 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
25172 size_t size[3] = {sz, sz2, sz3};
25173 size_t work_dimension = 3;
25176 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25177 work_dimension = 1;
25179 else if(temp_sz > 0){
25181 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25182 For default multidimensional global work size, leave the global_work_size vector empty, \ 25183 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25184 the global_work_size vector.");
25188 work_dimension = 1;
25190 else if (temp_sz == 2){
25193 work_dimension = 2;
25200 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25207 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25209 clWaitForEvents(1, &gpuExec);
25211 double *result = (
double *) malloc(typesz);
25212 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25214 v->assign(result, result+sz);
25216 if (typesz2 != typesz or sz != sz2){
25218 result2 = (
char *) malloc(typesz2);
25219 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25221 v2->assign(result2, result2+sz2);
25225 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25227 v2->assign(result, result+sz2);
25230 if (typesz3 != typesz or sz != sz3){
25232 result3 = (
char *) malloc(typesz3);
25233 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
25235 v3->assign(result3, result3+sz3);
25239 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25241 v3->assign(result, result+sz3);
25244 clReleaseCommandQueue (queue);
25245 clReleaseMemObject(buffer);
25246 clReleaseMemObject(buffer2);
25247 clReleaseMemObject(buffer3);
25248 clReleaseEvent(gpuExec);
25254 size_t sz = v.size();
25255 size_t sz2 = v2.size();
25256 size_t sz3 = v3.size();
25257 size_t typesz =
sizeof(double) * sz;
25258 size_t typesz2 =
sizeof(char) * sz2;
25259 size_t typesz3 =
sizeof(int) * sz3;
25260 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25264 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25271 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25275 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25277 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25279 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25281 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25282 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25283 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25284 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25286 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
25288 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25290 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25293 size_t size[3] = {sz, sz2, sz3};
25294 size_t work_dimension = 3;
25297 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25298 work_dimension = 1;
25300 else if(temp_sz > 0){
25302 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25303 For default multidimensional global work size, leave the global_work_size vector empty, \ 25304 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25305 the global_work_size vector.");
25309 work_dimension = 1;
25311 else if (temp_sz == 2){
25314 work_dimension = 2;
25321 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25328 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25330 clWaitForEvents(1, &gpuExec);
25332 double *result = (
double *) malloc(typesz);
25333 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25335 std::vector<double> res = std::vector<double>();
25336 res.assign(result, result+sz);
25338 clReleaseCommandQueue (queue);
25339 clReleaseMemObject(buffer);
25340 clReleaseMemObject(buffer2);
25341 clReleaseMemObject(buffer3);
25342 clReleaseEvent(gpuExec);
25349 size_t sz = v->size();
25350 size_t sz2 = v2.size();
25351 size_t sz3 = v3.size();
25352 size_t typesz =
sizeof(double) * sz;
25353 size_t typesz2 =
sizeof(char) * sz2;
25354 size_t typesz3 =
sizeof(int) * sz3;
25355 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25359 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25366 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25370 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25372 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25374 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25376 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25377 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25378 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25379 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25381 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25383 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25385 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25388 size_t size[3] = {sz, sz2, sz3};
25389 size_t work_dimension = 3;
25392 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25393 work_dimension = 1;
25395 else if(temp_sz > 0){
25397 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25398 For default multidimensional global work size, leave the global_work_size vector empty, \ 25399 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25400 the global_work_size vector.");
25404 work_dimension = 1;
25406 else if (temp_sz == 2){
25409 work_dimension = 2;
25416 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25423 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25425 clWaitForEvents(1, &gpuExec);
25427 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25429 clWaitForEvents(1, &gpuExec);
25431 double *result = (
double *) malloc(typesz);
25432 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25434 v->assign(result, result+sz);
25436 clReleaseCommandQueue (queue);
25437 clReleaseMemObject(buffer);
25438 clReleaseMemObject(buffer2);
25439 clReleaseMemObject(buffer3);
25440 clReleaseEvent(gpuExec);
25445 size_t sz = v->size();
25446 size_t sz2 = v2->size();
25447 size_t sz3 = v3.size();
25448 size_t typesz =
sizeof(double) * sz;
25449 size_t typesz2 =
sizeof(char) * sz2;
25450 size_t typesz3 =
sizeof(int) * sz3;
25451 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25455 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25462 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25466 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25468 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25470 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25472 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25473 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25474 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25475 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25477 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25479 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25481 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25484 size_t size[3] = {sz, sz2, sz3};
25485 size_t work_dimension = 3;
25488 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25489 work_dimension = 1;
25491 else if(temp_sz > 0){
25493 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25494 For default multidimensional global work size, leave the global_work_size vector empty, \ 25495 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25496 the global_work_size vector.");
25500 work_dimension = 1;
25502 else if (temp_sz == 2){
25505 work_dimension = 2;
25512 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25519 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25521 clWaitForEvents(1, &gpuExec);
25523 double *result = (
double *) malloc(typesz);
25524 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25526 v->assign(result, result+sz);
25528 if (typesz2 != typesz or sz != sz2){
25530 result2 = (
char *) malloc(typesz2);
25531 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25533 v2->assign(result2, result2+sz2);
25537 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25539 v2->assign(result, result+sz2);
25542 clReleaseCommandQueue (queue);
25543 clReleaseMemObject(buffer);
25544 clReleaseMemObject(buffer2);
25545 clReleaseMemObject(buffer3);
25546 clReleaseEvent(gpuExec);
25551 size_t sz = v->size();
25552 size_t sz2 = v2->size();
25553 size_t sz3 = v3->size();
25554 size_t typesz =
sizeof(double) * sz;
25555 size_t typesz2 =
sizeof(char) * sz2;
25556 size_t typesz3 =
sizeof(int) * sz3;
25557 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25561 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25568 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25572 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25574 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25576 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25578 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25579 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25580 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25581 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25583 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25585 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25587 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
25590 size_t size[3] = {sz, sz2, sz3};
25591 size_t work_dimension = 3;
25594 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25595 work_dimension = 1;
25597 else if(temp_sz > 0){
25599 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25600 For default multidimensional global work size, leave the global_work_size vector empty, \ 25601 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25602 the global_work_size vector.");
25606 work_dimension = 1;
25608 else if (temp_sz == 2){
25611 work_dimension = 2;
25618 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25625 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25627 clWaitForEvents(1, &gpuExec);
25629 double *result = (
double *) malloc(typesz);
25630 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25632 v->assign(result, result+sz);
25634 if (typesz2 != typesz or sz != sz2){
25636 result2 = (
char *) malloc(typesz2);
25637 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25639 v2->assign(result2, result2+sz2);
25643 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25645 v2->assign(result, result+sz2);
25648 if (typesz3 != typesz or sz != sz3){
25650 result3 = (
int *) malloc(typesz3);
25651 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
25653 v3->assign(result3, result3+sz3);
25657 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25659 v3->assign(result, result+sz3);
25662 clReleaseCommandQueue (queue);
25663 clReleaseMemObject(buffer);
25664 clReleaseMemObject(buffer2);
25665 clReleaseMemObject(buffer3);
25666 clReleaseEvent(gpuExec);
25672 size_t sz = v.size();
25673 size_t sz2 = v2.size();
25674 size_t sz3 = v3.size();
25675 size_t typesz =
sizeof(double) * sz;
25676 size_t typesz2 =
sizeof(char) * sz2;
25677 size_t typesz3 =
sizeof(float) * sz3;
25678 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25682 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25689 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25693 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25695 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25697 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25699 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25700 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25701 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25702 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25704 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
25706 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25708 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25711 size_t size[3] = {sz, sz2, sz3};
25712 size_t work_dimension = 3;
25715 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25716 work_dimension = 1;
25718 else if(temp_sz > 0){
25720 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25721 For default multidimensional global work size, leave the global_work_size vector empty, \ 25722 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25723 the global_work_size vector.");
25727 work_dimension = 1;
25729 else if (temp_sz == 2){
25732 work_dimension = 2;
25739 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25746 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25748 clWaitForEvents(1, &gpuExec);
25750 double *result = (
double *) malloc(typesz);
25751 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25753 std::vector<double> res = std::vector<double>();
25754 res.assign(result, result+sz);
25756 clReleaseCommandQueue (queue);
25757 clReleaseMemObject(buffer);
25758 clReleaseMemObject(buffer2);
25759 clReleaseMemObject(buffer3);
25760 clReleaseEvent(gpuExec);
25767 size_t sz = v->size();
25768 size_t sz2 = v2.size();
25769 size_t sz3 = v3.size();
25770 size_t typesz =
sizeof(double) * sz;
25771 size_t typesz2 =
sizeof(char) * sz2;
25772 size_t typesz3 =
sizeof(float) * sz3;
25773 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25777 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25784 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25788 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25790 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
25792 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25794 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25795 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25796 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25797 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25799 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25801 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
25803 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25806 size_t size[3] = {sz, sz2, sz3};
25807 size_t work_dimension = 3;
25810 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25811 work_dimension = 1;
25813 else if(temp_sz > 0){
25815 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25816 For default multidimensional global work size, leave the global_work_size vector empty, \ 25817 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25818 the global_work_size vector.");
25822 work_dimension = 1;
25824 else if (temp_sz == 2){
25827 work_dimension = 2;
25834 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25841 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25843 clWaitForEvents(1, &gpuExec);
25845 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25847 clWaitForEvents(1, &gpuExec);
25849 double *result = (
double *) malloc(typesz);
25850 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25852 v->assign(result, result+sz);
25854 clReleaseCommandQueue (queue);
25855 clReleaseMemObject(buffer);
25856 clReleaseMemObject(buffer2);
25857 clReleaseMemObject(buffer3);
25858 clReleaseEvent(gpuExec);
25863 size_t sz = v->size();
25864 size_t sz2 = v2->size();
25865 size_t sz3 = v3.size();
25866 size_t typesz =
sizeof(double) * sz;
25867 size_t typesz2 =
sizeof(char) * sz2;
25868 size_t typesz3 =
sizeof(float) * sz3;
25869 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25873 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25880 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25884 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25886 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25888 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
25890 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25891 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25892 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25893 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
25895 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
25897 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
25899 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
25902 size_t size[3] = {sz, sz2, sz3};
25903 size_t work_dimension = 3;
25906 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
25907 work_dimension = 1;
25909 else if(temp_sz > 0){
25911 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 25912 For default multidimensional global work size, leave the global_work_size vector empty, \ 25913 and set multi_dimensional to true. Setting the global work size based on the values inside \ 25914 the global_work_size vector.");
25918 work_dimension = 1;
25920 else if (temp_sz == 2){
25923 work_dimension = 2;
25930 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
25937 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
25939 clWaitForEvents(1, &gpuExec);
25941 double *result = (
double *) malloc(typesz);
25942 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
25944 v->assign(result, result+sz);
25946 if (typesz2 != typesz or sz != sz2){
25948 result2 = (
char *) malloc(typesz2);
25949 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
25951 v2->assign(result2, result2+sz2);
25955 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
25957 v2->assign(result, result+sz2);
25960 clReleaseCommandQueue (queue);
25961 clReleaseMemObject(buffer);
25962 clReleaseMemObject(buffer2);
25963 clReleaseMemObject(buffer3);
25964 clReleaseEvent(gpuExec);
25969 size_t sz = v->size();
25970 size_t sz2 = v2->size();
25971 size_t sz3 = v3->size();
25972 size_t typesz =
sizeof(double) * sz;
25973 size_t typesz2 =
sizeof(char) * sz2;
25974 size_t typesz3 =
sizeof(float) * sz3;
25975 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
25979 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
25986 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
25990 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
25992 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
25994 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
25996 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
25997 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
25998 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
25999 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26001 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26003 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26005 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26008 size_t size[3] = {sz, sz2, sz3};
26009 size_t work_dimension = 3;
26012 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26013 work_dimension = 1;
26015 else if(temp_sz > 0){
26017 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26018 For default multidimensional global work size, leave the global_work_size vector empty, \ 26019 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26020 the global_work_size vector.");
26024 work_dimension = 1;
26026 else if (temp_sz == 2){
26029 work_dimension = 2;
26036 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26043 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26045 clWaitForEvents(1, &gpuExec);
26047 double *result = (
double *) malloc(typesz);
26048 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26050 v->assign(result, result+sz);
26052 if (typesz2 != typesz or sz != sz2){
26054 result2 = (
char *) malloc(typesz2);
26055 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26057 v2->assign(result2, result2+sz2);
26061 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26063 v2->assign(result, result+sz2);
26066 if (typesz3 != typesz or sz != sz3){
26068 result3 = (
float *) malloc(typesz3);
26069 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26071 v3->assign(result3, result3+sz3);
26075 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26077 v3->assign(result, result+sz3);
26080 clReleaseCommandQueue (queue);
26081 clReleaseMemObject(buffer);
26082 clReleaseMemObject(buffer2);
26083 clReleaseMemObject(buffer3);
26084 clReleaseEvent(gpuExec);
26090 size_t sz = v.size();
26091 size_t sz2 = v2.size();
26092 size_t sz3 = v3.size();
26093 size_t typesz =
sizeof(double) * sz;
26094 size_t typesz2 =
sizeof(char) * sz2;
26095 size_t typesz3 =
sizeof(double) * sz3;
26096 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26100 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26107 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26111 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26113 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26115 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26117 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26118 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26119 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26120 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26122 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26124 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26126 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26129 size_t size[3] = {sz, sz2, sz3};
26130 size_t work_dimension = 3;
26133 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26134 work_dimension = 1;
26136 else if(temp_sz > 0){
26138 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26139 For default multidimensional global work size, leave the global_work_size vector empty, \ 26140 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26141 the global_work_size vector.");
26145 work_dimension = 1;
26147 else if (temp_sz == 2){
26150 work_dimension = 2;
26157 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26164 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26166 clWaitForEvents(1, &gpuExec);
26168 double *result = (
double *) malloc(typesz);
26169 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26171 std::vector<double> res = std::vector<double>();
26172 res.assign(result, result+sz);
26174 clReleaseCommandQueue (queue);
26175 clReleaseMemObject(buffer);
26176 clReleaseMemObject(buffer2);
26177 clReleaseMemObject(buffer3);
26178 clReleaseEvent(gpuExec);
26185 size_t sz = v->size();
26186 size_t sz2 = v2.size();
26187 size_t sz3 = v3.size();
26188 size_t typesz =
sizeof(double) * sz;
26189 size_t typesz2 =
sizeof(char) * sz2;
26190 size_t typesz3 =
sizeof(double) * sz3;
26191 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26195 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26202 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26206 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26208 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26210 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26212 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26213 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26214 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26215 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26217 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26219 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26221 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26224 size_t size[3] = {sz, sz2, sz3};
26225 size_t work_dimension = 3;
26228 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26229 work_dimension = 1;
26231 else if(temp_sz > 0){
26233 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26234 For default multidimensional global work size, leave the global_work_size vector empty, \ 26235 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26236 the global_work_size vector.");
26240 work_dimension = 1;
26242 else if (temp_sz == 2){
26245 work_dimension = 2;
26252 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26259 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26261 clWaitForEvents(1, &gpuExec);
26263 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26265 clWaitForEvents(1, &gpuExec);
26267 double *result = (
double *) malloc(typesz);
26268 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26270 v->assign(result, result+sz);
26272 clReleaseCommandQueue (queue);
26273 clReleaseMemObject(buffer);
26274 clReleaseMemObject(buffer2);
26275 clReleaseMemObject(buffer3);
26276 clReleaseEvent(gpuExec);
26281 size_t sz = v->size();
26282 size_t sz2 = v2->size();
26283 size_t sz3 = v3.size();
26284 size_t typesz =
sizeof(double) * sz;
26285 size_t typesz2 =
sizeof(char) * sz2;
26286 size_t typesz3 =
sizeof(double) * sz3;
26287 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26291 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26298 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26302 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26304 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26306 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26308 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26309 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26310 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26311 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26313 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26315 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26317 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26320 size_t size[3] = {sz, sz2, sz3};
26321 size_t work_dimension = 3;
26324 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26325 work_dimension = 1;
26327 else if(temp_sz > 0){
26329 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26330 For default multidimensional global work size, leave the global_work_size vector empty, \ 26331 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26332 the global_work_size vector.");
26336 work_dimension = 1;
26338 else if (temp_sz == 2){
26341 work_dimension = 2;
26348 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26355 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26357 clWaitForEvents(1, &gpuExec);
26359 double *result = (
double *) malloc(typesz);
26360 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26362 v->assign(result, result+sz);
26364 if (typesz2 != typesz or sz != sz2){
26366 result2 = (
char *) malloc(typesz2);
26367 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26369 v2->assign(result2, result2+sz2);
26373 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26375 v2->assign(result, result+sz2);
26378 clReleaseCommandQueue (queue);
26379 clReleaseMemObject(buffer);
26380 clReleaseMemObject(buffer2);
26381 clReleaseMemObject(buffer3);
26382 clReleaseEvent(gpuExec);
26387 size_t sz = v->size();
26388 size_t sz2 = v2->size();
26389 size_t sz3 = v3->size();
26390 size_t typesz =
sizeof(double) * sz;
26391 size_t typesz2 =
sizeof(char) * sz2;
26392 size_t typesz3 =
sizeof(double) * sz3;
26393 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26397 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26404 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26408 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26410 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26412 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
26414 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26415 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26416 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26417 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26419 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26421 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26423 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26426 size_t size[3] = {sz, sz2, sz3};
26427 size_t work_dimension = 3;
26430 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26431 work_dimension = 1;
26433 else if(temp_sz > 0){
26435 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26436 For default multidimensional global work size, leave the global_work_size vector empty, \ 26437 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26438 the global_work_size vector.");
26442 work_dimension = 1;
26444 else if (temp_sz == 2){
26447 work_dimension = 2;
26454 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26461 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26463 clWaitForEvents(1, &gpuExec);
26465 double *result = (
double *) malloc(typesz);
26466 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26468 v->assign(result, result+sz);
26470 if (typesz2 != typesz or sz != sz2){
26472 result2 = (
char *) malloc(typesz2);
26473 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26475 v2->assign(result2, result2+sz2);
26479 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26481 v2->assign(result, result+sz2);
26484 if (typesz3 != typesz or sz != sz3){
26486 result3 = (
double *) malloc(typesz3);
26487 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26489 v3->assign(result3, result3+sz3);
26493 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26495 v3->assign(result, result+sz3);
26498 clReleaseCommandQueue (queue);
26499 clReleaseMemObject(buffer);
26500 clReleaseMemObject(buffer2);
26501 clReleaseMemObject(buffer3);
26502 clReleaseEvent(gpuExec);
26508 size_t sz = v.size();
26509 size_t sz2 = v2.size();
26510 size_t sz3 = v3.size();
26511 size_t typesz =
sizeof(double) * sz;
26512 size_t typesz2 =
sizeof(int) * sz2;
26513 size_t typesz3 =
sizeof(char) * sz3;
26514 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26518 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26525 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26529 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26531 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26533 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26535 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26536 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26537 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26538 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26540 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26542 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26544 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26547 size_t size[3] = {sz, sz2, sz3};
26548 size_t work_dimension = 3;
26551 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26552 work_dimension = 1;
26554 else if(temp_sz > 0){
26556 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26557 For default multidimensional global work size, leave the global_work_size vector empty, \ 26558 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26559 the global_work_size vector.");
26563 work_dimension = 1;
26565 else if (temp_sz == 2){
26568 work_dimension = 2;
26575 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26582 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26584 clWaitForEvents(1, &gpuExec);
26586 double *result = (
double *) malloc(typesz);
26587 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26589 std::vector<double> res = std::vector<double>();
26590 res.assign(result, result+sz);
26592 clReleaseCommandQueue (queue);
26593 clReleaseMemObject(buffer);
26594 clReleaseMemObject(buffer2);
26595 clReleaseMemObject(buffer3);
26596 clReleaseEvent(gpuExec);
26603 size_t sz = v->size();
26604 size_t sz2 = v2.size();
26605 size_t sz3 = v3.size();
26606 size_t typesz =
sizeof(double) * sz;
26607 size_t typesz2 =
sizeof(int) * sz2;
26608 size_t typesz3 =
sizeof(char) * sz3;
26609 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26613 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26620 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26624 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26626 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26628 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26630 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26631 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26632 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26633 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26635 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26637 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26639 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26642 size_t size[3] = {sz, sz2, sz3};
26643 size_t work_dimension = 3;
26646 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26647 work_dimension = 1;
26649 else if(temp_sz > 0){
26651 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26652 For default multidimensional global work size, leave the global_work_size vector empty, \ 26653 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26654 the global_work_size vector.");
26658 work_dimension = 1;
26660 else if (temp_sz == 2){
26663 work_dimension = 2;
26670 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26677 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26679 clWaitForEvents(1, &gpuExec);
26681 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26683 clWaitForEvents(1, &gpuExec);
26685 double *result = (
double *) malloc(typesz);
26686 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26688 v->assign(result, result+sz);
26690 clReleaseCommandQueue (queue);
26691 clReleaseMemObject(buffer);
26692 clReleaseMemObject(buffer2);
26693 clReleaseMemObject(buffer3);
26694 clReleaseEvent(gpuExec);
26699 size_t sz = v->size();
26700 size_t sz2 = v2->size();
26701 size_t sz3 = v3.size();
26702 size_t typesz =
sizeof(double) * sz;
26703 size_t typesz2 =
sizeof(int) * sz2;
26704 size_t typesz3 =
sizeof(char) * sz3;
26705 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26709 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26716 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26720 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26722 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26724 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26726 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26727 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26728 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26729 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26731 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26733 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26735 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26738 size_t size[3] = {sz, sz2, sz3};
26739 size_t work_dimension = 3;
26742 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26743 work_dimension = 1;
26745 else if(temp_sz > 0){
26747 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26748 For default multidimensional global work size, leave the global_work_size vector empty, \ 26749 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26750 the global_work_size vector.");
26754 work_dimension = 1;
26756 else if (temp_sz == 2){
26759 work_dimension = 2;
26766 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26773 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26775 clWaitForEvents(1, &gpuExec);
26777 double *result = (
double *) malloc(typesz);
26778 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26780 v->assign(result, result+sz);
26782 if (typesz2 != typesz or sz != sz2){
26784 result2 = (
int *) malloc(typesz2);
26785 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26787 v2->assign(result2, result2+sz2);
26791 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26793 v2->assign(result, result+sz2);
26796 clReleaseCommandQueue (queue);
26797 clReleaseMemObject(buffer);
26798 clReleaseMemObject(buffer2);
26799 clReleaseMemObject(buffer3);
26800 clReleaseEvent(gpuExec);
26805 size_t sz = v->size();
26806 size_t sz2 = v2->size();
26807 size_t sz3 = v3->size();
26808 size_t typesz =
sizeof(double) * sz;
26809 size_t typesz2 =
sizeof(int) * sz2;
26810 size_t typesz3 =
sizeof(char) * sz3;
26811 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26815 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26822 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26826 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26828 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
26830 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
26832 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26833 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26834 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26835 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26837 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
26839 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
26841 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
26844 size_t size[3] = {sz, sz2, sz3};
26845 size_t work_dimension = 3;
26848 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26849 work_dimension = 1;
26851 else if(temp_sz > 0){
26853 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26854 For default multidimensional global work size, leave the global_work_size vector empty, \ 26855 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26856 the global_work_size vector.");
26860 work_dimension = 1;
26862 else if (temp_sz == 2){
26865 work_dimension = 2;
26872 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
26879 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
26881 clWaitForEvents(1, &gpuExec);
26883 double *result = (
double *) malloc(typesz);
26884 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
26886 v->assign(result, result+sz);
26888 if (typesz2 != typesz or sz != sz2){
26890 result2 = (
int *) malloc(typesz2);
26891 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
26893 v2->assign(result2, result2+sz2);
26897 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26899 v2->assign(result, result+sz2);
26902 if (typesz3 != typesz or sz != sz3){
26904 result3 = (
char *) malloc(typesz3);
26905 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
26907 v3->assign(result3, result3+sz3);
26911 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
26913 v3->assign(result, result+sz3);
26916 clReleaseCommandQueue (queue);
26917 clReleaseMemObject(buffer);
26918 clReleaseMemObject(buffer2);
26919 clReleaseMemObject(buffer3);
26920 clReleaseEvent(gpuExec);
26926 size_t sz = v.size();
26927 size_t sz2 = v2.size();
26928 size_t sz3 = v3.size();
26929 size_t typesz =
sizeof(double) * sz;
26930 size_t typesz2 =
sizeof(int) * sz2;
26931 size_t typesz3 =
sizeof(int) * sz3;
26932 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
26936 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
26943 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
26947 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
26949 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
26951 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
26953 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
26954 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
26955 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
26956 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
26958 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
26960 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
26962 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
26965 size_t size[3] = {sz, sz2, sz3};
26966 size_t work_dimension = 3;
26969 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
26970 work_dimension = 1;
26972 else if(temp_sz > 0){
26974 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 26975 For default multidimensional global work size, leave the global_work_size vector empty, \ 26976 and set multi_dimensional to true. Setting the global work size based on the values inside \ 26977 the global_work_size vector.");
26981 work_dimension = 1;
26983 else if (temp_sz == 2){
26986 work_dimension = 2;
26993 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27000 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27002 clWaitForEvents(1, &gpuExec);
27004 double *result = (
double *) malloc(typesz);
27005 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27007 std::vector<double> res = std::vector<double>();
27008 res.assign(result, result+sz);
27010 clReleaseCommandQueue (queue);
27011 clReleaseMemObject(buffer);
27012 clReleaseMemObject(buffer2);
27013 clReleaseMemObject(buffer3);
27014 clReleaseEvent(gpuExec);
27021 size_t sz = v->size();
27022 size_t sz2 = v2.size();
27023 size_t sz3 = v3.size();
27024 size_t typesz =
sizeof(double) * sz;
27025 size_t typesz2 =
sizeof(int) * sz2;
27026 size_t typesz3 =
sizeof(int) * sz3;
27027 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27031 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27038 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27042 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27044 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27046 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27048 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27049 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27050 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27051 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27053 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27055 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27057 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27060 size_t size[3] = {sz, sz2, sz3};
27061 size_t work_dimension = 3;
27064 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27065 work_dimension = 1;
27067 else if(temp_sz > 0){
27069 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27070 For default multidimensional global work size, leave the global_work_size vector empty, \ 27071 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27072 the global_work_size vector.");
27076 work_dimension = 1;
27078 else if (temp_sz == 2){
27081 work_dimension = 2;
27088 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27095 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27097 clWaitForEvents(1, &gpuExec);
27099 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27101 clWaitForEvents(1, &gpuExec);
27103 double *result = (
double *) malloc(typesz);
27104 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27106 v->assign(result, result+sz);
27108 clReleaseCommandQueue (queue);
27109 clReleaseMemObject(buffer);
27110 clReleaseMemObject(buffer2);
27111 clReleaseMemObject(buffer3);
27112 clReleaseEvent(gpuExec);
27117 size_t sz = v->size();
27118 size_t sz2 = v2->size();
27119 size_t sz3 = v3.size();
27120 size_t typesz =
sizeof(double) * sz;
27121 size_t typesz2 =
sizeof(int) * sz2;
27122 size_t typesz3 =
sizeof(int) * sz3;
27123 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27127 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27134 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27138 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27140 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27142 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27144 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27145 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27146 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27147 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27149 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27151 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27153 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27156 size_t size[3] = {sz, sz2, sz3};
27157 size_t work_dimension = 3;
27160 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27161 work_dimension = 1;
27163 else if(temp_sz > 0){
27165 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27166 For default multidimensional global work size, leave the global_work_size vector empty, \ 27167 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27168 the global_work_size vector.");
27172 work_dimension = 1;
27174 else if (temp_sz == 2){
27177 work_dimension = 2;
27184 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27191 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27193 clWaitForEvents(1, &gpuExec);
27195 double *result = (
double *) malloc(typesz);
27196 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27198 v->assign(result, result+sz);
27200 if (typesz2 != typesz or sz != sz2){
27202 result2 = (
int *) malloc(typesz2);
27203 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27205 v2->assign(result2, result2+sz2);
27209 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27211 v2->assign(result, result+sz2);
27214 clReleaseCommandQueue (queue);
27215 clReleaseMemObject(buffer);
27216 clReleaseMemObject(buffer2);
27217 clReleaseMemObject(buffer3);
27218 clReleaseEvent(gpuExec);
27223 size_t sz = v->size();
27224 size_t sz2 = v2->size();
27225 size_t sz3 = v3->size();
27226 size_t typesz =
sizeof(double) * sz;
27227 size_t typesz2 =
sizeof(int) * sz2;
27228 size_t typesz3 =
sizeof(int) * sz3;
27229 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27233 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27240 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27244 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27246 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27248 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
27250 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27251 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27252 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27253 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27255 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27257 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27259 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
27262 size_t size[3] = {sz, sz2, sz3};
27263 size_t work_dimension = 3;
27266 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27267 work_dimension = 1;
27269 else if(temp_sz > 0){
27271 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27272 For default multidimensional global work size, leave the global_work_size vector empty, \ 27273 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27274 the global_work_size vector.");
27278 work_dimension = 1;
27280 else if (temp_sz == 2){
27283 work_dimension = 2;
27290 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27297 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27299 clWaitForEvents(1, &gpuExec);
27301 double *result = (
double *) malloc(typesz);
27302 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27304 v->assign(result, result+sz);
27306 if (typesz2 != typesz or sz != sz2){
27308 result2 = (
int *) malloc(typesz2);
27309 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27311 v2->assign(result2, result2+sz2);
27315 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27317 v2->assign(result, result+sz2);
27320 if (typesz3 != typesz or sz != sz3){
27322 result3 = (
int *) malloc(typesz3);
27323 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
27325 v3->assign(result3, result3+sz3);
27329 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27331 v3->assign(result, result+sz3);
27334 clReleaseCommandQueue (queue);
27335 clReleaseMemObject(buffer);
27336 clReleaseMemObject(buffer2);
27337 clReleaseMemObject(buffer3);
27338 clReleaseEvent(gpuExec);
27344 size_t sz = v.size();
27345 size_t sz2 = v2.size();
27346 size_t sz3 = v3.size();
27347 size_t typesz =
sizeof(double) * sz;
27348 size_t typesz2 =
sizeof(int) * sz2;
27349 size_t typesz3 =
sizeof(float) * sz3;
27350 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27354 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27361 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27365 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27367 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27369 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27371 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27372 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27373 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27374 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27376 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
27378 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27380 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27383 size_t size[3] = {sz, sz2, sz3};
27384 size_t work_dimension = 3;
27387 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27388 work_dimension = 1;
27390 else if(temp_sz > 0){
27392 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27393 For default multidimensional global work size, leave the global_work_size vector empty, \ 27394 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27395 the global_work_size vector.");
27399 work_dimension = 1;
27401 else if (temp_sz == 2){
27404 work_dimension = 2;
27411 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27418 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27420 clWaitForEvents(1, &gpuExec);
27422 double *result = (
double *) malloc(typesz);
27423 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27425 std::vector<double> res = std::vector<double>();
27426 res.assign(result, result+sz);
27428 clReleaseCommandQueue (queue);
27429 clReleaseMemObject(buffer);
27430 clReleaseMemObject(buffer2);
27431 clReleaseMemObject(buffer3);
27432 clReleaseEvent(gpuExec);
27439 size_t sz = v->size();
27440 size_t sz2 = v2.size();
27441 size_t sz3 = v3.size();
27442 size_t typesz =
sizeof(double) * sz;
27443 size_t typesz2 =
sizeof(int) * sz2;
27444 size_t typesz3 =
sizeof(float) * sz3;
27445 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27449 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27456 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27460 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27462 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27464 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27466 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27467 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27468 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27469 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27471 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27473 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27475 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27478 size_t size[3] = {sz, sz2, sz3};
27479 size_t work_dimension = 3;
27482 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27483 work_dimension = 1;
27485 else if(temp_sz > 0){
27487 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27488 For default multidimensional global work size, leave the global_work_size vector empty, \ 27489 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27490 the global_work_size vector.");
27494 work_dimension = 1;
27496 else if (temp_sz == 2){
27499 work_dimension = 2;
27506 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27513 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27515 clWaitForEvents(1, &gpuExec);
27517 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27519 clWaitForEvents(1, &gpuExec);
27521 double *result = (
double *) malloc(typesz);
27522 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27524 v->assign(result, result+sz);
27526 clReleaseCommandQueue (queue);
27527 clReleaseMemObject(buffer);
27528 clReleaseMemObject(buffer2);
27529 clReleaseMemObject(buffer3);
27530 clReleaseEvent(gpuExec);
27535 size_t sz = v->size();
27536 size_t sz2 = v2->size();
27537 size_t sz3 = v3.size();
27538 size_t typesz =
sizeof(double) * sz;
27539 size_t typesz2 =
sizeof(int) * sz2;
27540 size_t typesz3 =
sizeof(float) * sz3;
27541 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27545 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27552 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27556 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27558 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27560 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27562 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27563 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27564 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27565 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27567 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27569 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27571 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27574 size_t size[3] = {sz, sz2, sz3};
27575 size_t work_dimension = 3;
27578 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27579 work_dimension = 1;
27581 else if(temp_sz > 0){
27583 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27584 For default multidimensional global work size, leave the global_work_size vector empty, \ 27585 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27586 the global_work_size vector.");
27590 work_dimension = 1;
27592 else if (temp_sz == 2){
27595 work_dimension = 2;
27602 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27609 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27611 clWaitForEvents(1, &gpuExec);
27613 double *result = (
double *) malloc(typesz);
27614 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27616 v->assign(result, result+sz);
27618 if (typesz2 != typesz or sz != sz2){
27620 result2 = (
int *) malloc(typesz2);
27621 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27623 v2->assign(result2, result2+sz2);
27627 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27629 v2->assign(result, result+sz2);
27632 clReleaseCommandQueue (queue);
27633 clReleaseMemObject(buffer);
27634 clReleaseMemObject(buffer2);
27635 clReleaseMemObject(buffer3);
27636 clReleaseEvent(gpuExec);
27641 size_t sz = v->size();
27642 size_t sz2 = v2->size();
27643 size_t sz3 = v3->size();
27644 size_t typesz =
sizeof(double) * sz;
27645 size_t typesz2 =
sizeof(int) * sz2;
27646 size_t typesz3 =
sizeof(float) * sz3;
27647 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27651 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27658 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27662 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27664 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27666 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
27668 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27669 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27670 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27671 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27673 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27675 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27677 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
27680 size_t size[3] = {sz, sz2, sz3};
27681 size_t work_dimension = 3;
27684 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27685 work_dimension = 1;
27687 else if(temp_sz > 0){
27689 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27690 For default multidimensional global work size, leave the global_work_size vector empty, \ 27691 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27692 the global_work_size vector.");
27696 work_dimension = 1;
27698 else if (temp_sz == 2){
27701 work_dimension = 2;
27708 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27715 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27717 clWaitForEvents(1, &gpuExec);
27719 double *result = (
double *) malloc(typesz);
27720 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27722 v->assign(result, result+sz);
27724 if (typesz2 != typesz or sz != sz2){
27726 result2 = (
int *) malloc(typesz2);
27727 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
27729 v2->assign(result2, result2+sz2);
27733 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27735 v2->assign(result, result+sz2);
27738 if (typesz3 != typesz or sz != sz3){
27740 result3 = (
float *) malloc(typesz3);
27741 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
27743 v3->assign(result3, result3+sz3);
27747 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
27749 v3->assign(result, result+sz3);
27752 clReleaseCommandQueue (queue);
27753 clReleaseMemObject(buffer);
27754 clReleaseMemObject(buffer2);
27755 clReleaseMemObject(buffer3);
27756 clReleaseEvent(gpuExec);
27762 size_t sz = v.size();
27763 size_t sz2 = v2.size();
27764 size_t sz3 = v3.size();
27765 size_t typesz =
sizeof(double) * sz;
27766 size_t typesz2 =
sizeof(int) * sz2;
27767 size_t typesz3 =
sizeof(double) * sz3;
27768 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27772 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27779 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27783 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27785 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27787 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27789 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27790 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27791 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27792 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27794 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
27796 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27798 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27801 size_t size[3] = {sz, sz2, sz3};
27802 size_t work_dimension = 3;
27805 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27806 work_dimension = 1;
27808 else if(temp_sz > 0){
27810 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27811 For default multidimensional global work size, leave the global_work_size vector empty, \ 27812 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27813 the global_work_size vector.");
27817 work_dimension = 1;
27819 else if (temp_sz == 2){
27822 work_dimension = 2;
27829 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27836 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27838 clWaitForEvents(1, &gpuExec);
27840 double *result = (
double *) malloc(typesz);
27841 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27843 std::vector<double> res = std::vector<double>();
27844 res.assign(result, result+sz);
27846 clReleaseCommandQueue (queue);
27847 clReleaseMemObject(buffer);
27848 clReleaseMemObject(buffer2);
27849 clReleaseMemObject(buffer3);
27850 clReleaseEvent(gpuExec);
27857 size_t sz = v->size();
27858 size_t sz2 = v2.size();
27859 size_t sz3 = v3.size();
27860 size_t typesz =
sizeof(double) * sz;
27861 size_t typesz2 =
sizeof(int) * sz2;
27862 size_t typesz3 =
sizeof(double) * sz3;
27863 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27867 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27874 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27878 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27880 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
27882 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27884 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27885 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27886 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27887 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27889 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27891 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
27893 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27896 size_t size[3] = {sz, sz2, sz3};
27897 size_t work_dimension = 3;
27900 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27901 work_dimension = 1;
27903 else if(temp_sz > 0){
27905 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 27906 For default multidimensional global work size, leave the global_work_size vector empty, \ 27907 and set multi_dimensional to true. Setting the global work size based on the values inside \ 27908 the global_work_size vector.");
27912 work_dimension = 1;
27914 else if (temp_sz == 2){
27917 work_dimension = 2;
27924 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
27931 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27933 clWaitForEvents(1, &gpuExec);
27935 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
27937 clWaitForEvents(1, &gpuExec);
27939 double *result = (
double *) malloc(typesz);
27940 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
27942 v->assign(result, result+sz);
27944 clReleaseCommandQueue (queue);
27945 clReleaseMemObject(buffer);
27946 clReleaseMemObject(buffer2);
27947 clReleaseMemObject(buffer3);
27948 clReleaseEvent(gpuExec);
27953 size_t sz = v->size();
27954 size_t sz2 = v2->size();
27955 size_t sz3 = v3.size();
27956 size_t typesz =
sizeof(double) * sz;
27957 size_t typesz2 =
sizeof(int) * sz2;
27958 size_t typesz3 =
sizeof(double) * sz3;
27959 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
27963 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
27970 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
27974 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
27976 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
27978 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
27980 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
27981 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
27982 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
27983 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
27985 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
27987 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
27989 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
27992 size_t size[3] = {sz, sz2, sz3};
27993 size_t work_dimension = 3;
27996 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
27997 work_dimension = 1;
27999 else if(temp_sz > 0){
28001 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28002 For default multidimensional global work size, leave the global_work_size vector empty, \ 28003 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28004 the global_work_size vector.");
28008 work_dimension = 1;
28010 else if (temp_sz == 2){
28013 work_dimension = 2;
28020 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28027 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28029 clWaitForEvents(1, &gpuExec);
28031 double *result = (
double *) malloc(typesz);
28032 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28034 v->assign(result, result+sz);
28036 if (typesz2 != typesz or sz != sz2){
28038 result2 = (
int *) malloc(typesz2);
28039 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28041 v2->assign(result2, result2+sz2);
28045 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28047 v2->assign(result, result+sz2);
28050 clReleaseCommandQueue (queue);
28051 clReleaseMemObject(buffer);
28052 clReleaseMemObject(buffer2);
28053 clReleaseMemObject(buffer3);
28054 clReleaseEvent(gpuExec);
28059 size_t sz = v->size();
28060 size_t sz2 = v2->size();
28061 size_t sz3 = v3->size();
28062 size_t typesz =
sizeof(double) * sz;
28063 size_t typesz2 =
sizeof(int) * sz2;
28064 size_t typesz3 =
sizeof(double) * sz3;
28065 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28069 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28076 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28080 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28082 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28084 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28086 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28087 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28088 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28089 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28091 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28093 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28095 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28098 size_t size[3] = {sz, sz2, sz3};
28099 size_t work_dimension = 3;
28102 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28103 work_dimension = 1;
28105 else if(temp_sz > 0){
28107 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28108 For default multidimensional global work size, leave the global_work_size vector empty, \ 28109 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28110 the global_work_size vector.");
28114 work_dimension = 1;
28116 else if (temp_sz == 2){
28119 work_dimension = 2;
28126 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28133 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28135 clWaitForEvents(1, &gpuExec);
28137 double *result = (
double *) malloc(typesz);
28138 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28140 v->assign(result, result+sz);
28142 if (typesz2 != typesz or sz != sz2){
28144 result2 = (
int *) malloc(typesz2);
28145 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28147 v2->assign(result2, result2+sz2);
28151 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28153 v2->assign(result, result+sz2);
28156 if (typesz3 != typesz or sz != sz3){
28158 result3 = (
double *) malloc(typesz3);
28159 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28161 v3->assign(result3, result3+sz3);
28165 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28167 v3->assign(result, result+sz3);
28170 clReleaseCommandQueue (queue);
28171 clReleaseMemObject(buffer);
28172 clReleaseMemObject(buffer2);
28173 clReleaseMemObject(buffer3);
28174 clReleaseEvent(gpuExec);
28180 size_t sz = v.size();
28181 size_t sz2 = v2.size();
28182 size_t sz3 = v3.size();
28183 size_t typesz =
sizeof(double) * sz;
28184 size_t typesz2 =
sizeof(float) * sz2;
28185 size_t typesz3 =
sizeof(char) * sz3;
28186 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28190 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28197 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28201 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28203 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28205 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28207 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28208 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28209 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28210 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28212 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
28214 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28216 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28219 size_t size[3] = {sz, sz2, sz3};
28220 size_t work_dimension = 3;
28223 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28224 work_dimension = 1;
28226 else if(temp_sz > 0){
28228 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28229 For default multidimensional global work size, leave the global_work_size vector empty, \ 28230 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28231 the global_work_size vector.");
28235 work_dimension = 1;
28237 else if (temp_sz == 2){
28240 work_dimension = 2;
28247 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28254 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28256 clWaitForEvents(1, &gpuExec);
28258 double *result = (
double *) malloc(typesz);
28259 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28261 std::vector<double> res = std::vector<double>();
28262 res.assign(result, result+sz);
28264 clReleaseCommandQueue (queue);
28265 clReleaseMemObject(buffer);
28266 clReleaseMemObject(buffer2);
28267 clReleaseMemObject(buffer3);
28268 clReleaseEvent(gpuExec);
28275 size_t sz = v->size();
28276 size_t sz2 = v2.size();
28277 size_t sz3 = v3.size();
28278 size_t typesz =
sizeof(double) * sz;
28279 size_t typesz2 =
sizeof(float) * sz2;
28280 size_t typesz3 =
sizeof(char) * sz3;
28281 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28285 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28292 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28296 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28298 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28300 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28302 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28303 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28304 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28305 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28307 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28309 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28311 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28314 size_t size[3] = {sz, sz2, sz3};
28315 size_t work_dimension = 3;
28318 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28319 work_dimension = 1;
28321 else if(temp_sz > 0){
28323 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28324 For default multidimensional global work size, leave the global_work_size vector empty, \ 28325 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28326 the global_work_size vector.");
28330 work_dimension = 1;
28332 else if (temp_sz == 2){
28335 work_dimension = 2;
28342 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28349 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28351 clWaitForEvents(1, &gpuExec);
28353 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28355 clWaitForEvents(1, &gpuExec);
28357 double *result = (
double *) malloc(typesz);
28358 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28360 v->assign(result, result+sz);
28362 clReleaseCommandQueue (queue);
28363 clReleaseMemObject(buffer);
28364 clReleaseMemObject(buffer2);
28365 clReleaseMemObject(buffer3);
28366 clReleaseEvent(gpuExec);
28371 size_t sz = v->size();
28372 size_t sz2 = v2->size();
28373 size_t sz3 = v3.size();
28374 size_t typesz =
sizeof(double) * sz;
28375 size_t typesz2 =
sizeof(float) * sz2;
28376 size_t typesz3 =
sizeof(char) * sz3;
28377 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28381 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28388 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28392 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28394 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28396 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28398 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28399 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28400 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28401 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28403 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28405 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28407 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28410 size_t size[3] = {sz, sz2, sz3};
28411 size_t work_dimension = 3;
28414 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28415 work_dimension = 1;
28417 else if(temp_sz > 0){
28419 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28420 For default multidimensional global work size, leave the global_work_size vector empty, \ 28421 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28422 the global_work_size vector.");
28426 work_dimension = 1;
28428 else if (temp_sz == 2){
28431 work_dimension = 2;
28438 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28445 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28447 clWaitForEvents(1, &gpuExec);
28449 double *result = (
double *) malloc(typesz);
28450 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28452 v->assign(result, result+sz);
28454 if (typesz2 != typesz or sz != sz2){
28456 result2 = (
float *) malloc(typesz2);
28457 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28459 v2->assign(result2, result2+sz2);
28463 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28465 v2->assign(result, result+sz2);
28468 clReleaseCommandQueue (queue);
28469 clReleaseMemObject(buffer);
28470 clReleaseMemObject(buffer2);
28471 clReleaseMemObject(buffer3);
28472 clReleaseEvent(gpuExec);
28477 size_t sz = v->size();
28478 size_t sz2 = v2->size();
28479 size_t sz3 = v3->size();
28480 size_t typesz =
sizeof(double) * sz;
28481 size_t typesz2 =
sizeof(float) * sz2;
28482 size_t typesz3 =
sizeof(char) * sz3;
28483 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28487 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28494 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28498 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28500 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28502 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28504 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28505 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28506 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28507 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28509 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28511 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28513 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28516 size_t size[3] = {sz, sz2, sz3};
28517 size_t work_dimension = 3;
28520 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28521 work_dimension = 1;
28523 else if(temp_sz > 0){
28525 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28526 For default multidimensional global work size, leave the global_work_size vector empty, \ 28527 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28528 the global_work_size vector.");
28532 work_dimension = 1;
28534 else if (temp_sz == 2){
28537 work_dimension = 2;
28544 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28551 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28553 clWaitForEvents(1, &gpuExec);
28555 double *result = (
double *) malloc(typesz);
28556 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28558 v->assign(result, result+sz);
28560 if (typesz2 != typesz or sz != sz2){
28562 result2 = (
float *) malloc(typesz2);
28563 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28565 v2->assign(result2, result2+sz2);
28569 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28571 v2->assign(result, result+sz2);
28574 if (typesz3 != typesz or sz != sz3){
28576 result3 = (
char *) malloc(typesz3);
28577 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28579 v3->assign(result3, result3+sz3);
28583 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28585 v3->assign(result, result+sz3);
28588 clReleaseCommandQueue (queue);
28589 clReleaseMemObject(buffer);
28590 clReleaseMemObject(buffer2);
28591 clReleaseMemObject(buffer3);
28592 clReleaseEvent(gpuExec);
28598 size_t sz = v.size();
28599 size_t sz2 = v2.size();
28600 size_t sz3 = v3.size();
28601 size_t typesz =
sizeof(double) * sz;
28602 size_t typesz2 =
sizeof(float) * sz2;
28603 size_t typesz3 =
sizeof(int) * sz3;
28604 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28608 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28615 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28619 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28621 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28623 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28625 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28626 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28627 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28628 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28630 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
28632 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28634 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28637 size_t size[3] = {sz, sz2, sz3};
28638 size_t work_dimension = 3;
28641 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28642 work_dimension = 1;
28644 else if(temp_sz > 0){
28646 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28647 For default multidimensional global work size, leave the global_work_size vector empty, \ 28648 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28649 the global_work_size vector.");
28653 work_dimension = 1;
28655 else if (temp_sz == 2){
28658 work_dimension = 2;
28665 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28672 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28674 clWaitForEvents(1, &gpuExec);
28676 double *result = (
double *) malloc(typesz);
28677 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28679 std::vector<double> res = std::vector<double>();
28680 res.assign(result, result+sz);
28682 clReleaseCommandQueue (queue);
28683 clReleaseMemObject(buffer);
28684 clReleaseMemObject(buffer2);
28685 clReleaseMemObject(buffer3);
28686 clReleaseEvent(gpuExec);
28693 size_t sz = v->size();
28694 size_t sz2 = v2.size();
28695 size_t sz3 = v3.size();
28696 size_t typesz =
sizeof(double) * sz;
28697 size_t typesz2 =
sizeof(float) * sz2;
28698 size_t typesz3 =
sizeof(int) * sz3;
28699 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28703 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28710 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28714 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28716 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
28718 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28720 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28721 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28722 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28723 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28725 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28727 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
28729 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28732 size_t size[3] = {sz, sz2, sz3};
28733 size_t work_dimension = 3;
28736 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28737 work_dimension = 1;
28739 else if(temp_sz > 0){
28741 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28742 For default multidimensional global work size, leave the global_work_size vector empty, \ 28743 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28744 the global_work_size vector.");
28748 work_dimension = 1;
28750 else if (temp_sz == 2){
28753 work_dimension = 2;
28760 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28767 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28769 clWaitForEvents(1, &gpuExec);
28771 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28773 clWaitForEvents(1, &gpuExec);
28775 double *result = (
double *) malloc(typesz);
28776 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28778 v->assign(result, result+sz);
28780 clReleaseCommandQueue (queue);
28781 clReleaseMemObject(buffer);
28782 clReleaseMemObject(buffer2);
28783 clReleaseMemObject(buffer3);
28784 clReleaseEvent(gpuExec);
28789 size_t sz = v->size();
28790 size_t sz2 = v2->size();
28791 size_t sz3 = v3.size();
28792 size_t typesz =
sizeof(double) * sz;
28793 size_t typesz2 =
sizeof(float) * sz2;
28794 size_t typesz3 =
sizeof(int) * sz3;
28795 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28799 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28806 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28810 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28812 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28814 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
28816 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28817 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28818 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28819 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28821 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28823 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28825 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
28828 size_t size[3] = {sz, sz2, sz3};
28829 size_t work_dimension = 3;
28832 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28833 work_dimension = 1;
28835 else if(temp_sz > 0){
28837 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28838 For default multidimensional global work size, leave the global_work_size vector empty, \ 28839 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28840 the global_work_size vector.");
28844 work_dimension = 1;
28846 else if (temp_sz == 2){
28849 work_dimension = 2;
28856 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28863 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28865 clWaitForEvents(1, &gpuExec);
28867 double *result = (
double *) malloc(typesz);
28868 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28870 v->assign(result, result+sz);
28872 if (typesz2 != typesz or sz != sz2){
28874 result2 = (
float *) malloc(typesz2);
28875 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28877 v2->assign(result2, result2+sz2);
28881 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28883 v2->assign(result, result+sz2);
28886 clReleaseCommandQueue (queue);
28887 clReleaseMemObject(buffer);
28888 clReleaseMemObject(buffer2);
28889 clReleaseMemObject(buffer3);
28890 clReleaseEvent(gpuExec);
28895 size_t sz = v->size();
28896 size_t sz2 = v2->size();
28897 size_t sz3 = v3->size();
28898 size_t typesz =
sizeof(double) * sz;
28899 size_t typesz2 =
sizeof(float) * sz2;
28900 size_t typesz3 =
sizeof(int) * sz3;
28901 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
28905 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
28912 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
28916 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
28918 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
28920 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
28922 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
28923 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
28924 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
28925 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
28927 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
28929 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
28931 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
28934 size_t size[3] = {sz, sz2, sz3};
28935 size_t work_dimension = 3;
28938 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
28939 work_dimension = 1;
28941 else if(temp_sz > 0){
28943 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 28944 For default multidimensional global work size, leave the global_work_size vector empty, \ 28945 and set multi_dimensional to true. Setting the global work size based on the values inside \ 28946 the global_work_size vector.");
28950 work_dimension = 1;
28952 else if (temp_sz == 2){
28955 work_dimension = 2;
28962 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
28969 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
28971 clWaitForEvents(1, &gpuExec);
28973 double *result = (
double *) malloc(typesz);
28974 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
28976 v->assign(result, result+sz);
28978 if (typesz2 != typesz or sz != sz2){
28980 result2 = (
float *) malloc(typesz2);
28981 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
28983 v2->assign(result2, result2+sz2);
28987 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
28989 v2->assign(result, result+sz2);
28992 if (typesz3 != typesz or sz != sz3){
28994 result3 = (
int *) malloc(typesz3);
28995 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
28997 v3->assign(result3, result3+sz3);
29001 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29003 v3->assign(result, result+sz3);
29006 clReleaseCommandQueue (queue);
29007 clReleaseMemObject(buffer);
29008 clReleaseMemObject(buffer2);
29009 clReleaseMemObject(buffer3);
29010 clReleaseEvent(gpuExec);
29016 size_t sz = v.size();
29017 size_t sz2 = v2.size();
29018 size_t sz3 = v3.size();
29019 size_t typesz =
sizeof(double) * sz;
29020 size_t typesz2 =
sizeof(float) * sz2;
29021 size_t typesz3 =
sizeof(float) * sz3;
29022 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29026 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29033 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29037 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29039 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29041 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29043 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29044 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29045 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29046 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29048 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29050 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29052 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29055 size_t size[3] = {sz, sz2, sz3};
29056 size_t work_dimension = 3;
29059 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29060 work_dimension = 1;
29062 else if(temp_sz > 0){
29064 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29065 For default multidimensional global work size, leave the global_work_size vector empty, \ 29066 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29067 the global_work_size vector.");
29071 work_dimension = 1;
29073 else if (temp_sz == 2){
29076 work_dimension = 2;
29083 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29090 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29092 clWaitForEvents(1, &gpuExec);
29094 double *result = (
double *) malloc(typesz);
29095 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29097 std::vector<double> res = std::vector<double>();
29098 res.assign(result, result+sz);
29100 clReleaseCommandQueue (queue);
29101 clReleaseMemObject(buffer);
29102 clReleaseMemObject(buffer2);
29103 clReleaseMemObject(buffer3);
29104 clReleaseEvent(gpuExec);
29111 size_t sz = v->size();
29112 size_t sz2 = v2.size();
29113 size_t sz3 = v3.size();
29114 size_t typesz =
sizeof(double) * sz;
29115 size_t typesz2 =
sizeof(float) * sz2;
29116 size_t typesz3 =
sizeof(float) * sz3;
29117 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29121 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29128 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29132 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29134 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29136 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29138 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29139 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29140 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29141 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29143 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29145 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29147 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29150 size_t size[3] = {sz, sz2, sz3};
29151 size_t work_dimension = 3;
29154 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29155 work_dimension = 1;
29157 else if(temp_sz > 0){
29159 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29160 For default multidimensional global work size, leave the global_work_size vector empty, \ 29161 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29162 the global_work_size vector.");
29166 work_dimension = 1;
29168 else if (temp_sz == 2){
29171 work_dimension = 2;
29178 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29185 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29187 clWaitForEvents(1, &gpuExec);
29189 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29191 clWaitForEvents(1, &gpuExec);
29193 double *result = (
double *) malloc(typesz);
29194 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29196 v->assign(result, result+sz);
29198 clReleaseCommandQueue (queue);
29199 clReleaseMemObject(buffer);
29200 clReleaseMemObject(buffer2);
29201 clReleaseMemObject(buffer3);
29202 clReleaseEvent(gpuExec);
29207 size_t sz = v->size();
29208 size_t sz2 = v2->size();
29209 size_t sz3 = v3.size();
29210 size_t typesz =
sizeof(double) * sz;
29211 size_t typesz2 =
sizeof(float) * sz2;
29212 size_t typesz3 =
sizeof(float) * sz3;
29213 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29217 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29224 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29228 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29230 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29232 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29234 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29235 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29236 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29237 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29239 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29241 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29243 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29246 size_t size[3] = {sz, sz2, sz3};
29247 size_t work_dimension = 3;
29250 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29251 work_dimension = 1;
29253 else if(temp_sz > 0){
29255 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29256 For default multidimensional global work size, leave the global_work_size vector empty, \ 29257 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29258 the global_work_size vector.");
29262 work_dimension = 1;
29264 else if (temp_sz == 2){
29267 work_dimension = 2;
29274 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29281 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29283 clWaitForEvents(1, &gpuExec);
29285 double *result = (
double *) malloc(typesz);
29286 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29288 v->assign(result, result+sz);
29290 if (typesz2 != typesz or sz != sz2){
29292 result2 = (
float *) malloc(typesz2);
29293 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29295 v2->assign(result2, result2+sz2);
29299 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29301 v2->assign(result, result+sz2);
29304 clReleaseCommandQueue (queue);
29305 clReleaseMemObject(buffer);
29306 clReleaseMemObject(buffer2);
29307 clReleaseMemObject(buffer3);
29308 clReleaseEvent(gpuExec);
29313 size_t sz = v->size();
29314 size_t sz2 = v2->size();
29315 size_t sz3 = v3->size();
29316 size_t typesz =
sizeof(double) * sz;
29317 size_t typesz2 =
sizeof(float) * sz2;
29318 size_t typesz3 =
sizeof(float) * sz3;
29319 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29323 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29330 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29334 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29336 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29338 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
29340 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29341 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29342 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29343 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29345 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29347 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29349 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
29352 size_t size[3] = {sz, sz2, sz3};
29353 size_t work_dimension = 3;
29356 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29357 work_dimension = 1;
29359 else if(temp_sz > 0){
29361 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29362 For default multidimensional global work size, leave the global_work_size vector empty, \ 29363 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29364 the global_work_size vector.");
29368 work_dimension = 1;
29370 else if (temp_sz == 2){
29373 work_dimension = 2;
29380 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29387 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29389 clWaitForEvents(1, &gpuExec);
29391 double *result = (
double *) malloc(typesz);
29392 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29394 v->assign(result, result+sz);
29396 if (typesz2 != typesz or sz != sz2){
29398 result2 = (
float *) malloc(typesz2);
29399 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29401 v2->assign(result2, result2+sz2);
29405 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29407 v2->assign(result, result+sz2);
29410 if (typesz3 != typesz or sz != sz3){
29412 result3 = (
float *) malloc(typesz3);
29413 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
29415 v3->assign(result3, result3+sz3);
29419 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29421 v3->assign(result, result+sz3);
29424 clReleaseCommandQueue (queue);
29425 clReleaseMemObject(buffer);
29426 clReleaseMemObject(buffer2);
29427 clReleaseMemObject(buffer3);
29428 clReleaseEvent(gpuExec);
29434 size_t sz = v.size();
29435 size_t sz2 = v2.size();
29436 size_t sz3 = v3.size();
29437 size_t typesz =
sizeof(double) * sz;
29438 size_t typesz2 =
sizeof(float) * sz2;
29439 size_t typesz3 =
sizeof(double) * sz3;
29440 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29444 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29451 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29455 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29457 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29459 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29461 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29462 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29463 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29464 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29466 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29468 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29470 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29473 size_t size[3] = {sz, sz2, sz3};
29474 size_t work_dimension = 3;
29477 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29478 work_dimension = 1;
29480 else if(temp_sz > 0){
29482 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29483 For default multidimensional global work size, leave the global_work_size vector empty, \ 29484 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29485 the global_work_size vector.");
29489 work_dimension = 1;
29491 else if (temp_sz == 2){
29494 work_dimension = 2;
29501 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29508 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29510 clWaitForEvents(1, &gpuExec);
29512 double *result = (
double *) malloc(typesz);
29513 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29515 std::vector<double> res = std::vector<double>();
29516 res.assign(result, result+sz);
29518 clReleaseCommandQueue (queue);
29519 clReleaseMemObject(buffer);
29520 clReleaseMemObject(buffer2);
29521 clReleaseMemObject(buffer3);
29522 clReleaseEvent(gpuExec);
29529 size_t sz = v->size();
29530 size_t sz2 = v2.size();
29531 size_t sz3 = v3.size();
29532 size_t typesz =
sizeof(double) * sz;
29533 size_t typesz2 =
sizeof(float) * sz2;
29534 size_t typesz3 =
sizeof(double) * sz3;
29535 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29539 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29546 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29550 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29552 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29554 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29556 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29557 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29558 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29559 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29561 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29563 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29565 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29568 size_t size[3] = {sz, sz2, sz3};
29569 size_t work_dimension = 3;
29572 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29573 work_dimension = 1;
29575 else if(temp_sz > 0){
29577 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29578 For default multidimensional global work size, leave the global_work_size vector empty, \ 29579 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29580 the global_work_size vector.");
29584 work_dimension = 1;
29586 else if (temp_sz == 2){
29589 work_dimension = 2;
29596 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29603 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29605 clWaitForEvents(1, &gpuExec);
29607 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29609 clWaitForEvents(1, &gpuExec);
29611 double *result = (
double *) malloc(typesz);
29612 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29614 v->assign(result, result+sz);
29616 clReleaseCommandQueue (queue);
29617 clReleaseMemObject(buffer);
29618 clReleaseMemObject(buffer2);
29619 clReleaseMemObject(buffer3);
29620 clReleaseEvent(gpuExec);
29625 size_t sz = v->size();
29626 size_t sz2 = v2->size();
29627 size_t sz3 = v3.size();
29628 size_t typesz =
sizeof(double) * sz;
29629 size_t typesz2 =
sizeof(float) * sz2;
29630 size_t typesz3 =
sizeof(double) * sz3;
29631 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29635 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29642 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29646 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29648 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29650 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29652 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29653 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29654 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29655 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29657 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29659 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29661 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29664 size_t size[3] = {sz, sz2, sz3};
29665 size_t work_dimension = 3;
29668 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29669 work_dimension = 1;
29671 else if(temp_sz > 0){
29673 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29674 For default multidimensional global work size, leave the global_work_size vector empty, \ 29675 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29676 the global_work_size vector.");
29680 work_dimension = 1;
29682 else if (temp_sz == 2){
29685 work_dimension = 2;
29692 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29699 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29701 clWaitForEvents(1, &gpuExec);
29703 double *result = (
double *) malloc(typesz);
29704 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29706 v->assign(result, result+sz);
29708 if (typesz2 != typesz or sz != sz2){
29710 result2 = (
float *) malloc(typesz2);
29711 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29713 v2->assign(result2, result2+sz2);
29717 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29719 v2->assign(result, result+sz2);
29722 clReleaseCommandQueue (queue);
29723 clReleaseMemObject(buffer);
29724 clReleaseMemObject(buffer2);
29725 clReleaseMemObject(buffer3);
29726 clReleaseEvent(gpuExec);
29731 size_t sz = v->size();
29732 size_t sz2 = v2->size();
29733 size_t sz3 = v3->size();
29734 size_t typesz =
sizeof(double) * sz;
29735 size_t typesz2 =
sizeof(float) * sz2;
29736 size_t typesz3 =
sizeof(double) * sz3;
29737 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29741 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29748 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29752 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29754 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
29756 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
29758 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29759 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29760 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29761 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29763 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29765 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
29767 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
29770 size_t size[3] = {sz, sz2, sz3};
29771 size_t work_dimension = 3;
29774 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29775 work_dimension = 1;
29777 else if(temp_sz > 0){
29779 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29780 For default multidimensional global work size, leave the global_work_size vector empty, \ 29781 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29782 the global_work_size vector.");
29786 work_dimension = 1;
29788 else if (temp_sz == 2){
29791 work_dimension = 2;
29798 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29805 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29807 clWaitForEvents(1, &gpuExec);
29809 double *result = (
double *) malloc(typesz);
29810 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29812 v->assign(result, result+sz);
29814 if (typesz2 != typesz or sz != sz2){
29816 result2 = (
float *) malloc(typesz2);
29817 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
29819 v2->assign(result2, result2+sz2);
29823 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29825 v2->assign(result, result+sz2);
29828 if (typesz3 != typesz or sz != sz3){
29830 result3 = (
double *) malloc(typesz3);
29831 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
29833 v3->assign(result3, result3+sz3);
29837 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
29839 v3->assign(result, result+sz3);
29842 clReleaseCommandQueue (queue);
29843 clReleaseMemObject(buffer);
29844 clReleaseMemObject(buffer2);
29845 clReleaseMemObject(buffer3);
29846 clReleaseEvent(gpuExec);
29852 size_t sz = v.size();
29853 size_t sz2 = v2.size();
29854 size_t sz3 = v3.size();
29855 size_t typesz =
sizeof(double) * sz;
29856 size_t typesz2 =
sizeof(double) * sz2;
29857 size_t typesz3 =
sizeof(char) * sz3;
29858 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29862 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29869 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29873 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29875 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29877 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29879 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29880 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29881 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29882 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29884 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
29886 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29888 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29891 size_t size[3] = {sz, sz2, sz3};
29892 size_t work_dimension = 3;
29895 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29896 work_dimension = 1;
29898 else if(temp_sz > 0){
29900 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29901 For default multidimensional global work size, leave the global_work_size vector empty, \ 29902 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29903 the global_work_size vector.");
29907 work_dimension = 1;
29909 else if (temp_sz == 2){
29912 work_dimension = 2;
29919 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
29926 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
29928 clWaitForEvents(1, &gpuExec);
29930 double *result = (
double *) malloc(typesz);
29931 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
29933 std::vector<double> res = std::vector<double>();
29934 res.assign(result, result+sz);
29936 clReleaseCommandQueue (queue);
29937 clReleaseMemObject(buffer);
29938 clReleaseMemObject(buffer2);
29939 clReleaseMemObject(buffer3);
29940 clReleaseEvent(gpuExec);
29947 size_t sz = v->size();
29948 size_t sz2 = v2.size();
29949 size_t sz3 = v3.size();
29950 size_t typesz =
sizeof(double) * sz;
29951 size_t typesz2 =
sizeof(double) * sz2;
29952 size_t typesz3 =
sizeof(char) * sz3;
29953 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
29957 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
29964 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
29968 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
29970 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
29972 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
29974 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
29975 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
29976 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
29977 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
29979 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
29981 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
29983 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
29986 size_t size[3] = {sz, sz2, sz3};
29987 size_t work_dimension = 3;
29990 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
29991 work_dimension = 1;
29993 else if(temp_sz > 0){
29995 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 29996 For default multidimensional global work size, leave the global_work_size vector empty, \ 29997 and set multi_dimensional to true. Setting the global work size based on the values inside \ 29998 the global_work_size vector.");
30002 work_dimension = 1;
30004 else if (temp_sz == 2){
30007 work_dimension = 2;
30014 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30021 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30023 clWaitForEvents(1, &gpuExec);
30025 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30027 clWaitForEvents(1, &gpuExec);
30029 double *result = (
double *) malloc(typesz);
30030 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30032 v->assign(result, result+sz);
30034 clReleaseCommandQueue (queue);
30035 clReleaseMemObject(buffer);
30036 clReleaseMemObject(buffer2);
30037 clReleaseMemObject(buffer3);
30038 clReleaseEvent(gpuExec);
30043 size_t sz = v->size();
30044 size_t sz2 = v2->size();
30045 size_t sz3 = v3.size();
30046 size_t typesz =
sizeof(double) * sz;
30047 size_t typesz2 =
sizeof(double) * sz2;
30048 size_t typesz3 =
sizeof(char) * sz3;
30049 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30053 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30060 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30064 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30066 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30068 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30070 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30071 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30072 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30073 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30075 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30077 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30079 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30082 size_t size[3] = {sz, sz2, sz3};
30083 size_t work_dimension = 3;
30086 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30087 work_dimension = 1;
30089 else if(temp_sz > 0){
30091 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30092 For default multidimensional global work size, leave the global_work_size vector empty, \ 30093 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30094 the global_work_size vector.");
30098 work_dimension = 1;
30100 else if (temp_sz == 2){
30103 work_dimension = 2;
30110 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30117 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30119 clWaitForEvents(1, &gpuExec);
30121 double *result = (
double *) malloc(typesz);
30122 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30124 v->assign(result, result+sz);
30126 if (typesz2 != typesz or sz != sz2){
30128 result2 = (
double *) malloc(typesz2);
30129 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30131 v2->assign(result2, result2+sz2);
30135 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30137 v2->assign(result, result+sz2);
30140 clReleaseCommandQueue (queue);
30141 clReleaseMemObject(buffer);
30142 clReleaseMemObject(buffer2);
30143 clReleaseMemObject(buffer3);
30144 clReleaseEvent(gpuExec);
30149 size_t sz = v->size();
30150 size_t sz2 = v2->size();
30151 size_t sz3 = v3->size();
30152 size_t typesz =
sizeof(double) * sz;
30153 size_t typesz2 =
sizeof(double) * sz2;
30154 size_t typesz3 =
sizeof(char) * sz3;
30155 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30159 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30166 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30170 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30172 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30174 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
30176 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30177 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30178 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30179 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30181 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30183 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30185 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
30188 size_t size[3] = {sz, sz2, sz3};
30189 size_t work_dimension = 3;
30192 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30193 work_dimension = 1;
30195 else if(temp_sz > 0){
30197 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30198 For default multidimensional global work size, leave the global_work_size vector empty, \ 30199 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30200 the global_work_size vector.");
30204 work_dimension = 1;
30206 else if (temp_sz == 2){
30209 work_dimension = 2;
30216 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30223 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30225 clWaitForEvents(1, &gpuExec);
30227 double *result = (
double *) malloc(typesz);
30228 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30230 v->assign(result, result+sz);
30232 if (typesz2 != typesz or sz != sz2){
30234 result2 = (
double *) malloc(typesz2);
30235 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30237 v2->assign(result2, result2+sz2);
30241 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30243 v2->assign(result, result+sz2);
30246 if (typesz3 != typesz or sz != sz3){
30248 result3 = (
char *) malloc(typesz3);
30249 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
30251 v3->assign(result3, result3+sz3);
30255 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30257 v3->assign(result, result+sz3);
30260 clReleaseCommandQueue (queue);
30261 clReleaseMemObject(buffer);
30262 clReleaseMemObject(buffer2);
30263 clReleaseMemObject(buffer3);
30264 clReleaseEvent(gpuExec);
30270 size_t sz = v.size();
30271 size_t sz2 = v2.size();
30272 size_t sz3 = v3.size();
30273 size_t typesz =
sizeof(double) * sz;
30274 size_t typesz2 =
sizeof(double) * sz2;
30275 size_t typesz3 =
sizeof(int) * sz3;
30276 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30280 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30287 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30291 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30293 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30295 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30297 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30298 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30299 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30300 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30302 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
30304 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30306 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30309 size_t size[3] = {sz, sz2, sz3};
30310 size_t work_dimension = 3;
30313 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30314 work_dimension = 1;
30316 else if(temp_sz > 0){
30318 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30319 For default multidimensional global work size, leave the global_work_size vector empty, \ 30320 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30321 the global_work_size vector.");
30325 work_dimension = 1;
30327 else if (temp_sz == 2){
30330 work_dimension = 2;
30337 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30344 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30346 clWaitForEvents(1, &gpuExec);
30348 double *result = (
double *) malloc(typesz);
30349 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30351 std::vector<double> res = std::vector<double>();
30352 res.assign(result, result+sz);
30354 clReleaseCommandQueue (queue);
30355 clReleaseMemObject(buffer);
30356 clReleaseMemObject(buffer2);
30357 clReleaseMemObject(buffer3);
30358 clReleaseEvent(gpuExec);
30365 size_t sz = v->size();
30366 size_t sz2 = v2.size();
30367 size_t sz3 = v3.size();
30368 size_t typesz =
sizeof(double) * sz;
30369 size_t typesz2 =
sizeof(double) * sz2;
30370 size_t typesz3 =
sizeof(int) * sz3;
30371 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30375 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30382 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30386 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30388 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30390 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30392 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30393 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30394 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30395 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30397 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30399 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30401 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30404 size_t size[3] = {sz, sz2, sz3};
30405 size_t work_dimension = 3;
30408 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30409 work_dimension = 1;
30411 else if(temp_sz > 0){
30413 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30414 For default multidimensional global work size, leave the global_work_size vector empty, \ 30415 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30416 the global_work_size vector.");
30420 work_dimension = 1;
30422 else if (temp_sz == 2){
30425 work_dimension = 2;
30432 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30439 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30441 clWaitForEvents(1, &gpuExec);
30443 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30445 clWaitForEvents(1, &gpuExec);
30447 double *result = (
double *) malloc(typesz);
30448 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30450 v->assign(result, result+sz);
30452 clReleaseCommandQueue (queue);
30453 clReleaseMemObject(buffer);
30454 clReleaseMemObject(buffer2);
30455 clReleaseMemObject(buffer3);
30456 clReleaseEvent(gpuExec);
30461 size_t sz = v->size();
30462 size_t sz2 = v2->size();
30463 size_t sz3 = v3.size();
30464 size_t typesz =
sizeof(double) * sz;
30465 size_t typesz2 =
sizeof(double) * sz2;
30466 size_t typesz3 =
sizeof(int) * sz3;
30467 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30471 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30478 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30482 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30484 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30486 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30488 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30489 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30490 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30491 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30493 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30495 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30497 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30500 size_t size[3] = {sz, sz2, sz3};
30501 size_t work_dimension = 3;
30504 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30505 work_dimension = 1;
30507 else if(temp_sz > 0){
30509 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30510 For default multidimensional global work size, leave the global_work_size vector empty, \ 30511 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30512 the global_work_size vector.");
30516 work_dimension = 1;
30518 else if (temp_sz == 2){
30521 work_dimension = 2;
30528 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30535 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30537 clWaitForEvents(1, &gpuExec);
30539 double *result = (
double *) malloc(typesz);
30540 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30542 v->assign(result, result+sz);
30544 if (typesz2 != typesz or sz != sz2){
30546 result2 = (
double *) malloc(typesz2);
30547 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30549 v2->assign(result2, result2+sz2);
30553 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30555 v2->assign(result, result+sz2);
30558 clReleaseCommandQueue (queue);
30559 clReleaseMemObject(buffer);
30560 clReleaseMemObject(buffer2);
30561 clReleaseMemObject(buffer3);
30562 clReleaseEvent(gpuExec);
30567 size_t sz = v->size();
30568 size_t sz2 = v2->size();
30569 size_t sz3 = v3->size();
30570 size_t typesz =
sizeof(double) * sz;
30571 size_t typesz2 =
sizeof(double) * sz2;
30572 size_t typesz3 =
sizeof(int) * sz3;
30573 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30577 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30584 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30588 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30590 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30592 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
30594 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30595 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30596 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30597 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30599 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30601 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30603 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
30606 size_t size[3] = {sz, sz2, sz3};
30607 size_t work_dimension = 3;
30610 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30611 work_dimension = 1;
30613 else if(temp_sz > 0){
30615 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30616 For default multidimensional global work size, leave the global_work_size vector empty, \ 30617 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30618 the global_work_size vector.");
30622 work_dimension = 1;
30624 else if (temp_sz == 2){
30627 work_dimension = 2;
30634 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30641 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30643 clWaitForEvents(1, &gpuExec);
30645 double *result = (
double *) malloc(typesz);
30646 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30648 v->assign(result, result+sz);
30650 if (typesz2 != typesz or sz != sz2){
30652 result2 = (
double *) malloc(typesz2);
30653 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30655 v2->assign(result2, result2+sz2);
30659 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30661 v2->assign(result, result+sz2);
30664 if (typesz3 != typesz or sz != sz3){
30666 result3 = (
int *) malloc(typesz3);
30667 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
30669 v3->assign(result3, result3+sz3);
30673 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30675 v3->assign(result, result+sz3);
30678 clReleaseCommandQueue (queue);
30679 clReleaseMemObject(buffer);
30680 clReleaseMemObject(buffer2);
30681 clReleaseMemObject(buffer3);
30682 clReleaseEvent(gpuExec);
30688 size_t sz = v.size();
30689 size_t sz2 = v2.size();
30690 size_t sz3 = v3.size();
30691 size_t typesz =
sizeof(double) * sz;
30692 size_t typesz2 =
sizeof(double) * sz2;
30693 size_t typesz3 =
sizeof(float) * sz3;
30694 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30698 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30705 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30709 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30711 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30713 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30715 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30716 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30717 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30718 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30720 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
30722 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30724 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30727 size_t size[3] = {sz, sz2, sz3};
30728 size_t work_dimension = 3;
30731 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30732 work_dimension = 1;
30734 else if(temp_sz > 0){
30736 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30737 For default multidimensional global work size, leave the global_work_size vector empty, \ 30738 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30739 the global_work_size vector.");
30743 work_dimension = 1;
30745 else if (temp_sz == 2){
30748 work_dimension = 2;
30755 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30762 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30764 clWaitForEvents(1, &gpuExec);
30766 double *result = (
double *) malloc(typesz);
30767 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30769 std::vector<double> res = std::vector<double>();
30770 res.assign(result, result+sz);
30772 clReleaseCommandQueue (queue);
30773 clReleaseMemObject(buffer);
30774 clReleaseMemObject(buffer2);
30775 clReleaseMemObject(buffer3);
30776 clReleaseEvent(gpuExec);
30783 size_t sz = v->size();
30784 size_t sz2 = v2.size();
30785 size_t sz3 = v3.size();
30786 size_t typesz =
sizeof(double) * sz;
30787 size_t typesz2 =
sizeof(double) * sz2;
30788 size_t typesz3 =
sizeof(float) * sz3;
30789 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30793 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30800 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30804 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30806 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
30808 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30810 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30811 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30812 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30813 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30815 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30817 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
30819 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30822 size_t size[3] = {sz, sz2, sz3};
30823 size_t work_dimension = 3;
30826 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30827 work_dimension = 1;
30829 else if(temp_sz > 0){
30831 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30832 For default multidimensional global work size, leave the global_work_size vector empty, \ 30833 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30834 the global_work_size vector.");
30838 work_dimension = 1;
30840 else if (temp_sz == 2){
30843 work_dimension = 2;
30850 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30857 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30859 clWaitForEvents(1, &gpuExec);
30861 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30863 clWaitForEvents(1, &gpuExec);
30865 double *result = (
double *) malloc(typesz);
30866 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30868 v->assign(result, result+sz);
30870 clReleaseCommandQueue (queue);
30871 clReleaseMemObject(buffer);
30872 clReleaseMemObject(buffer2);
30873 clReleaseMemObject(buffer3);
30874 clReleaseEvent(gpuExec);
30879 size_t sz = v->size();
30880 size_t sz2 = v2->size();
30881 size_t sz3 = v3.size();
30882 size_t typesz =
sizeof(double) * sz;
30883 size_t typesz2 =
sizeof(double) * sz2;
30884 size_t typesz3 =
sizeof(float) * sz3;
30885 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30889 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
30896 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
30900 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
30902 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
30904 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
30906 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
30907 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
30908 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
30909 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
30911 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
30913 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
30915 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
30918 size_t size[3] = {sz, sz2, sz3};
30919 size_t work_dimension = 3;
30922 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
30923 work_dimension = 1;
30925 else if(temp_sz > 0){
30927 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 30928 For default multidimensional global work size, leave the global_work_size vector empty, \ 30929 and set multi_dimensional to true. Setting the global work size based on the values inside \ 30930 the global_work_size vector.");
30934 work_dimension = 1;
30936 else if (temp_sz == 2){
30939 work_dimension = 2;
30946 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
30953 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
30955 clWaitForEvents(1, &gpuExec);
30957 double *result = (
double *) malloc(typesz);
30958 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
30960 v->assign(result, result+sz);
30962 if (typesz2 != typesz or sz != sz2){
30964 result2 = (
double *) malloc(typesz2);
30965 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
30967 v2->assign(result2, result2+sz2);
30971 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
30973 v2->assign(result, result+sz2);
30976 clReleaseCommandQueue (queue);
30977 clReleaseMemObject(buffer);
30978 clReleaseMemObject(buffer2);
30979 clReleaseMemObject(buffer3);
30980 clReleaseEvent(gpuExec);
30985 size_t sz = v->size();
30986 size_t sz2 = v2->size();
30987 size_t sz3 = v3->size();
30988 size_t typesz =
sizeof(double) * sz;
30989 size_t typesz2 =
sizeof(double) * sz2;
30990 size_t typesz3 =
sizeof(float) * sz3;
30991 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
30995 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31002 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31006 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31008 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31010 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
31012 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
31013 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
31014 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
31015 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
31017 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31019 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31021 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
31024 size_t size[3] = {sz, sz2, sz3};
31025 size_t work_dimension = 3;
31028 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
31029 work_dimension = 1;
31031 else if(temp_sz > 0){
31033 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 31034 For default multidimensional global work size, leave the global_work_size vector empty, \ 31035 and set multi_dimensional to true. Setting the global work size based on the values inside \ 31036 the global_work_size vector.");
31040 work_dimension = 1;
31042 else if (temp_sz == 2){
31045 work_dimension = 2;
31052 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31059 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31061 clWaitForEvents(1, &gpuExec);
31063 double *result = (
double *) malloc(typesz);
31064 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31066 v->assign(result, result+sz);
31068 if (typesz2 != typesz or sz != sz2){
31070 result2 = (
double *) malloc(typesz2);
31071 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31073 v2->assign(result2, result2+sz2);
31077 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31079 v2->assign(result, result+sz2);
31082 if (typesz3 != typesz or sz != sz3){
31084 result3 = (
float *) malloc(typesz3);
31085 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
31087 v3->assign(result3, result3+sz3);
31091 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31093 v3->assign(result, result+sz3);
31096 clReleaseCommandQueue (queue);
31097 clReleaseMemObject(buffer);
31098 clReleaseMemObject(buffer2);
31099 clReleaseMemObject(buffer3);
31100 clReleaseEvent(gpuExec);
31106 size_t sz = v.size();
31107 size_t sz2 = v2.size();
31108 size_t sz3 = v3.size();
31109 size_t typesz =
sizeof(double) * sz;
31110 size_t typesz2 =
sizeof(double) * sz2;
31111 size_t typesz3 =
sizeof(double) * sz3;
31112 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
31116 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31123 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31127 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31129 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
31131 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31133 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
31134 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
31135 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
31136 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
31138 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v[0], 0, NULL, NULL);
31140 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
31142 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31145 size_t size[3] = {sz, sz2, sz3};
31146 size_t work_dimension = 3;
31149 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
31150 work_dimension = 1;
31152 else if(temp_sz > 0){
31154 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 31155 For default multidimensional global work size, leave the global_work_size vector empty, \ 31156 and set multi_dimensional to true. Setting the global work size based on the values inside \ 31157 the global_work_size vector.");
31161 work_dimension = 1;
31163 else if (temp_sz == 2){
31166 work_dimension = 2;
31173 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31180 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31182 clWaitForEvents(1, &gpuExec);
31184 double *result = (
double *) malloc(typesz);
31185 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31187 std::vector<double> res = std::vector<double>();
31188 res.assign(result, result+sz);
31190 clReleaseCommandQueue (queue);
31191 clReleaseMemObject(buffer);
31192 clReleaseMemObject(buffer2);
31193 clReleaseMemObject(buffer3);
31194 clReleaseEvent(gpuExec);
31201 size_t sz = v->size();
31202 size_t sz2 = v2.size();
31203 size_t sz3 = v3.size();
31204 size_t typesz =
sizeof(double) * sz;
31205 size_t typesz2 =
sizeof(double) * sz2;
31206 size_t typesz3 =
sizeof(double) * sz3;
31207 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
31211 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31218 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31222 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31224 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz2, NULL, &error);
31226 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31228 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
31229 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
31230 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
31231 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
31233 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31235 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2[0], 0, NULL, NULL);
31237 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31240 size_t size[3] = {sz, sz2, sz3};
31241 size_t work_dimension = 3;
31244 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
31245 work_dimension = 1;
31247 else if(temp_sz > 0){
31249 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 31250 For default multidimensional global work size, leave the global_work_size vector empty, \ 31251 and set multi_dimensional to true. Setting the global work size based on the values inside \ 31252 the global_work_size vector.");
31256 work_dimension = 1;
31258 else if (temp_sz == 2){
31261 work_dimension = 2;
31268 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31275 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31277 clWaitForEvents(1, &gpuExec);
31279 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31281 clWaitForEvents(1, &gpuExec);
31283 double *result = (
double *) malloc(typesz);
31284 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31286 v->assign(result, result+sz);
31288 clReleaseCommandQueue (queue);
31289 clReleaseMemObject(buffer);
31290 clReleaseMemObject(buffer2);
31291 clReleaseMemObject(buffer3);
31292 clReleaseEvent(gpuExec);
31297 size_t sz = v->size();
31298 size_t sz2 = v2->size();
31299 size_t sz3 = v3.size();
31300 size_t typesz =
sizeof(double) * sz;
31301 size_t typesz2 =
sizeof(double) * sz2;
31302 size_t typesz3 =
sizeof(double) * sz3;
31303 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
31307 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31314 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31318 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31320 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31322 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_WRITE_ONLY, typesz3, NULL, &error);
31324 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
31325 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
31326 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
31327 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
31329 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31331 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31333 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3[0], 0, NULL, NULL);
31336 size_t size[3] = {sz, sz2, sz3};
31337 size_t work_dimension = 3;
31340 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
31341 work_dimension = 1;
31343 else if(temp_sz > 0){
31345 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 31346 For default multidimensional global work size, leave the global_work_size vector empty, \ 31347 and set multi_dimensional to true. Setting the global work size based on the values inside \ 31348 the global_work_size vector.");
31352 work_dimension = 1;
31354 else if (temp_sz == 2){
31357 work_dimension = 2;
31364 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31371 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31373 clWaitForEvents(1, &gpuExec);
31375 double *result = (
double *) malloc(typesz);
31376 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31378 v->assign(result, result+sz);
31380 if (typesz2 != typesz or sz != sz2){
31382 result2 = (
double *) malloc(typesz2);
31383 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31385 v2->assign(result2, result2+sz2);
31389 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31391 v2->assign(result, result+sz2);
31394 clReleaseCommandQueue (queue);
31395 clReleaseMemObject(buffer);
31396 clReleaseMemObject(buffer2);
31397 clReleaseMemObject(buffer3);
31398 clReleaseEvent(gpuExec);
31403 size_t sz = v->size();
31404 size_t sz2 = v2->size();
31405 size_t sz3 = v3->size();
31406 size_t typesz =
sizeof(double) * sz;
31407 size_t typesz2 =
sizeof(double) * sz2;
31408 size_t typesz3 =
sizeof(double) * sz3;
31409 size_t temp_sz = params != NULL ? params->
buffers_size.size() : 0;
31413 ROS_WARN(
"buffer_size includes more than three elements. Exactly three are needed. Using the first three...");
31420 ROS_WARN(
"buffer_size includes less than three elements. Exactly three are needed for custom buffer sizes. Using default values...");
31424 cl_mem buffer = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz, NULL, &error);
31426 cl_mem buffer2 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz2, NULL, &error);
31428 cl_mem buffer3 = clCreateBuffer(
context, CL_MEM_READ_WRITE, typesz3, NULL, &error);
31430 clSetKernelArg (
kernel, 0,
sizeof (cl_mem), &buffer);
31431 clSetKernelArg (
kernel, 1,
sizeof (cl_mem), &buffer2);
31432 clSetKernelArg (
kernel, 2,
sizeof (cl_mem), &buffer3);
31433 cl_command_queue queue = clCreateCommandQueueWithProperties (
context,
deviceIds [0], NULL, &error);
31435 clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, typesz, &v->at(0), 0, NULL, NULL);
31437 clEnqueueWriteBuffer(queue, buffer2, CL_TRUE, 0, typesz2, &v2->at(0), 0, NULL, NULL);
31439 clEnqueueWriteBuffer(queue, buffer3, CL_TRUE, 0, typesz3, &v3->at(0), 0, NULL, NULL);
31442 size_t size[3] = {sz, sz2, sz3};
31443 size_t work_dimension = 3;
31446 if (params == NULL or (params != NULL and not(params->
multi_dimensional or temp_sz > 0))){
31447 work_dimension = 1;
31449 else if(temp_sz > 0){
31451 ROS_WARN(
"multi_dimensional should be set to true without pushing to global_work_size. \ 31452 For default multidimensional global work size, leave the global_work_size vector empty, \ 31453 and set multi_dimensional to true. Setting the global work size based on the values inside \ 31454 the global_work_size vector.");
31458 work_dimension = 1;
31460 else if (temp_sz == 2){
31463 work_dimension = 2;
31470 ROS_WARN(
"global_work_size includes more than three elements. A maximum of three is allowed. Using the first three...");
31477 checkError (clEnqueueNDRangeKernel (queue,
kernel, work_dimension, NULL, size, NULL, 0, NULL, &gpuExec));
31479 clWaitForEvents(1, &gpuExec);
31481 double *result = (
double *) malloc(typesz);
31482 checkError(clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, typesz, result, 0, NULL, NULL));
31484 v->assign(result, result+sz);
31486 if (typesz2 != typesz or sz != sz2){
31488 result2 = (
double *) malloc(typesz2);
31489 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result2, 0, NULL, NULL));
31491 v2->assign(result2, result2+sz2);
31495 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31497 v2->assign(result, result+sz2);
31500 if (typesz3 != typesz or sz != sz3){
31502 result3 = (
double *) malloc(typesz3);
31503 checkError(clEnqueueReadBuffer(queue, buffer3, CL_TRUE, 0, typesz3, result3, 0, NULL, NULL));
31505 v3->assign(result3, result3+sz3);
31509 checkError(clEnqueueReadBuffer(queue, buffer2, CL_TRUE, 0, typesz2, result, 0, NULL, NULL));
31511 v3->assign(result, result+sz3);
31514 clReleaseCommandQueue (queue);
31515 clReleaseMemObject(buffer);
31516 clReleaseMemObject(buffer2);
31517 clReleaseMemObject(buffer3);
31518 clReleaseEvent(gpuExec);
void checkError(const cl_int error)
The function that generates ROS_WARN messages based on OpenCL error codes.
std::string LoadKernel(const char *name)
The function that loads the OpenCL kernel.
std::string getPlatformName(const cl_platform_id id)
Gets the platform name based on its ID.
std::string getDeviceName(const cl_device_id id)
Gets the device name based on its ID.
ROS_OpenCL()
Default (and empty) constructor.
~ROS_OpenCL()
Destroys a ROS_OpenCL object.
void clean()
The function that clears all allocated memory for OpenCL objects.
The ROS_OpenCL_Params helper class.
sensor_msgs::PointCloud2 process(const sensor_msgs::PointCloud2 &msg)
The function that initiates kernel processing.
cl_program createProgram(const std::string &source, const cl_context context)
Creates an Opencl program object.
std::vector< cl_device_id > deviceIds
std::vector< size_t > global_work_size
ROS_OpenCL operator=(ROS_OpenCL *s)
Operator= overload.
std::vector< size_t > buffers_size